Source code for psi_io.psi_io

   1"""
   2Routines for reading and writing PSI-style HDF5 and HDF4 data files.
   3
   4This module provides a unified interface for interacting with PSI's HDF data
   5ecosystem.  It handles both HDF4 (``.hdf``) and HDF5 (``.h5``) file formats,
   6automatically dispatching to the appropriate backend based on the file
   7extension.
   8
   9Key interfaces
  10--------------
  11Reading full datasets:
  12    :func:`read_hdf_data`, :func:`rdhdf_1d`, :func:`rdhdf_2d`, :func:`rdhdf_3d`
  13
  14Writing full datasets:
  15    :func:`write_hdf_data`, :func:`wrhdf_1d`, :func:`wrhdf_2d`, :func:`wrhdf_3d`
  16
  17Reading file metadata:
  18    :func:`read_hdf_meta`, :func:`read_rtp_meta`
  19
  20Reading dataset subsets:
  21    :func:`get_scales_1d`, :func:`get_scales_2d`, :func:`get_scales_3d`,
  22    :func:`read_hdf_by_index`, :func:`read_hdf_by_value`, :func:`read_hdf_by_ivalue`
  23
  24Interpolating data:
  25    :func:`np_interpolate_slice_from_hdf`, :func:`sp_interpolate_slice_from_hdf`,
  26    :func:`interpolate_positions_from_hdf`
  27
  28Converting between formats:
  29    :func:`convert`, :func:`convert_psih4_to_psih5`
  30
  31See Also
  32--------
  33:mod:`psi_io.data` :
  34    Helpers for fetching example HDF data files.
  35
  36Examples
  37--------
  38Read a 3D PSI-style HDF5 file:
  39
  40>>> from psi_io import read_hdf_data
  41>>> from psi_io.data import get_3d_data
  42>>> filepath = get_3d_data()
  43>>> data, r, t, p = read_hdf_data(filepath)
  44>>> data.shape
  45(181, 100, 151)
  46"""
  47
  48from __future__ import annotations
  49
  50__all__ = [
  51    "read_hdf_meta",
  52    "read_rtp_meta",
  53
  54    "get_scales_1d",
  55    "get_scales_2d",
  56    "get_scales_3d",
  57
  58    "read_hdf_by_index",
  59    "read_hdf_by_value",
  60    "read_hdf_by_ivalue",
  61
  62    "np_interpolate_slice_from_hdf",
  63    "sp_interpolate_slice_from_hdf",
  64    "interpolate_positions_from_hdf",
  65
  66    "instantiate_linear_interpolator",
  67    "interpolate_point_from_1d_slice",
  68    "interpolate_point_from_2d_slice",
  69
  70    "read_hdf_data",
  71    "rdhdf_1d",
  72    "rdhdf_2d",
  73    "rdhdf_3d",
  74
  75    "write_hdf_data",
  76    "wrhdf_1d",
  77    "wrhdf_2d",
  78    "wrhdf_3d",
  79
  80    "convert",
  81    "convert_psih4_to_psih5"
  82]
  83
  84import math
  85from collections import namedtuple
  86from pathlib import Path
  87from types import MappingProxyType
  88from typing import Optional, Literal, Tuple, Sequence, List, Dict, Union, Callable, Any
  89
  90import numpy as np
  91import h5py as h5
  92
  93# -----------------------------------------------------------------------------
  94# Optional Imports and Import Checking
  95# -----------------------------------------------------------------------------
  96# These packages are needed by several functions and must be imported in the
  97# module namespace.
  98try:
  99    import pyhdf.SD as h4
 100    H4_AVAILABLE = True
 101    DTYPE_TO_SDC = MappingProxyType({
 102        "i": {
 103            1: h4.SDC.INT8,
 104            2: h4.SDC.INT16,
 105            4: h4.SDC.INT32,
 106        },
 107        "u": {
 108            1: h4.SDC.UINT8,
 109            2: h4.SDC.UINT16,
 110            4: h4.SDC.UINT32,
 111        },
 112        "f": {
 113            4: h4.SDC.FLOAT32,
 114            8: h4.SDC.FLOAT64,
 115        },
 116        "b": {
 117            1: h4.SDC.UINT8
 118        },
 119        "U": h4.SDC.CHAR,
 120        "S": h4.SDC.UCHAR
 121    })
 122    """
 123        Helper dictionary mapping :class:`~numpy.dtype` kinds to HDF4 SDC types.
 124
 125        The keys are :attr:`~numpy.dtype.kind`, and the values are either a direct
 126        mapping (for byte-string or unicode-string types) or a nested mapping of
 127        :attr:`~numpy.dtype.itemsize` to SDC type (for numeric types).
 128    """
 129except ImportError:
 130    H4_AVAILABLE = False
 131    DTYPE_TO_SDC = {}
 132
 133try:
 134    from scipy.interpolate import RegularGridInterpolator
 135    SCIPY_AVAILABLE = True
 136except ImportError:
 137    SCIPY_AVAILABLE = False
 138
 139
 140# Functions to stop execution if a package doesn't exist.
 141def _except_no_pyhdf():
 142    """Raise :exc:`ImportError` if the ``pyhdf`` package is not available.
 143
 144    Raises
 145    ------
 146    ImportError
 147        If ``pyhdf`` was not importable at module load time.
 148
 149    Examples
 150    --------
 151    >>> from psi_io.psi_io import _except_no_pyhdf, H4_AVAILABLE
 152    >>> H4_AVAILABLE  # doctest: +SKIP
 153    True
 154    >>> _except_no_pyhdf() is None  # no-op when pyhdf is installed
 155    True
 156    """
 157    if not H4_AVAILABLE:
 158        raise ImportError('The pyhdf package is required to read/write HDF4 .hdf files!')
 159    return
 160
 161
 162def _except_no_scipy():
 163    """Raise :exc:`ImportError` if the ``scipy`` package is not available.
 164
 165    Raises
 166    ------
 167    ImportError
 168        If ``scipy`` was not importable at module load time.
 169
 170    Examples
 171    --------
 172    >>> from psi_io.psi_io import _except_no_scipy, SCIPY_AVAILABLE
 173    >>> SCIPY_AVAILABLE  # doctest: +SKIP
 174    True
 175    >>> _except_no_scipy() is None  # no-op when scipy is installed
 176    True
 177    """
 178    if not SCIPY_AVAILABLE:
 179        raise ImportError('The scipy package is required for the interpolation routines!')
 180    return
 181
 182
 183SDC_TYPE_CONVERSIONS = MappingProxyType({
 184    3: np.dtype("ubyte"),
 185    4: np.dtype("byte"),
 186    5: np.dtype("float32"),
 187    6: np.dtype("float64"),
 188    20: np.dtype("int8"),
 189    21: np.dtype("uint8"),
 190    22: np.dtype("int16"),
 191    23: np.dtype("uint16"),
 192    24: np.dtype("int32"),
 193    25: np.dtype("uint32")
 194})
 195"""Helper dictionary for mapping HDF4 types to numpy dtypes"""
 196
 197
 198PSI_DATA_ID = MappingProxyType({
 199    'h4': 'Data-Set-2',
 200    'h5': 'Data'
 201})
 202"""Mapping of PSI standard dataset names for HDF4 and HDF5 files"""
 203
 204
 205PSI_SCALE_ID = MappingProxyType({
 206    'h4': ('fakeDim0', 'fakeDim1', 'fakeDim2'),
 207    'h5': ('dim1', 'dim2', 'dim3')
 208})
 209"""Mapping of PSI standard scale names for HDF4 and HDF5 files"""
 210
 211
 212HDFEXT = {'.hdf', '.h5'}
 213"""Set of possible HDF file extensions"""
 214
 215
 216HdfExtType = Literal['.hdf', '.h5']
 217"""Type alias for possible HDF file extensions"""
 218
 219
 220HdfScaleMeta = namedtuple('HdfScaleMeta', ['name', 'type', 'shape', 'imin', 'imax'])
 221"""
 222    Named tuple storing metadata for a single HDF scale (coordinate) dimension.
 223
 224    Parameters
 225    ----------
 226    name : str
 227        The name of the scale dataset.
 228    type : str
 229        The data type of the scale.
 230    shape : tuple[int, ...]
 231        The shape of the scale array.
 232    imin : float
 233        The minimum value of the scale.
 234        This assumes the scale is monotonically increasing.
 235    imax : float
 236        The maximum value of the scale.
 237        This assumes the scale is monotonically increasing.
 238"""
 239
 240
 241HdfDataMeta = namedtuple('HdfDataMeta', ['name', 'type', 'shape', 'attr', 'scales'])
 242"""
 243    Named tuple for HDF dataset metadata
 244
 245    Parameters
 246    ----------
 247    name : str
 248        The name of the dataset.
 249    type : str
 250        The data type of the dataset.
 251    shape : tuple[int, ...]
 252        The shape of the dataset.
 253    attr : dict[str, Any]
 254        A dictionary of attributes associated with the dataset.
 255    scales : list[HdfScaleMeta]
 256        A list of scale metadata objects corresponding to each dimension of the dataset.
 257        If the dataset has no scales, this list will be empty.
 258"""
 259
 260PathLike = Union[Path, str]
 261"""Type alias for file paths, accepting either :class:`pathlib.Path` or str"""
 262
 263
 264def _dtype_to_sdc(dtype: np.dtype):
 265    """Convert a numpy dtype to the corresponding HDF4 SDC type.
 266
 267    Parameters
 268    ----------
 269    dtype : np.dtype
 270        The numpy dtype to convert.
 271
 272    Returns
 273    -------
 274    out : int
 275        The corresponding HDF4 SDC type constant for the given numpy dtype.
 276
 277    Raises
 278    ------
 279    ImportError
 280        If the ``pyhdf`` package is not available.
 281    KeyError
 282        If the dtype kind or itemsize is not supported by HDF4.  See
 283        :func:`write_hdf_data` for the full dtype support table.
 284
 285    Examples
 286    --------
 287    >>> import numpy as np
 288    >>> from psi_io.psi_io import _dtype_to_sdc
 289    >>> import pyhdf.SD as h4
 290    >>> _dtype_to_sdc(np.dtype("float32")) == h4.SDC.FLOAT32
 291    True
 292    """
 293    _except_no_pyhdf()
 294    if dtype.kind in {"U", "S"}:
 295        return DTYPE_TO_SDC[dtype.kind]
 296
 297    try:
 298        return DTYPE_TO_SDC[dtype.kind][dtype.itemsize]
 299    except KeyError as e:
 300        if dtype.kind not in DTYPE_TO_SDC:
 301            msg = (f"Unsupported dtype kind '{dtype.kind}' for HDF4. "
 302                   f"Supported kinds are: {set(DTYPE_TO_SDC.keys())}")
 303            raise KeyError(msg) from e
 304        elif dtype.itemsize not in DTYPE_TO_SDC[dtype.kind]:
 305            msg = (f"Unsupported itemsize '{dtype.itemsize}' for dtype kind '{dtype.kind}' in HDF4. "
 306                   f"Supported itemsizes are: {set(DTYPE_TO_SDC[dtype.kind].keys())}")
 307            raise KeyError(msg) from e
 308        raise e
 309
 310
 311def _dispatch_by_ext(ifile: PathLike,
 312                     hdf4_func: Callable,
 313                     hdf5_func: Callable,
 314                     *args: Any, **kwargs: Any
 315                     ):
 316    """
 317    Dispatch function to call HDF4 or HDF5 specific functions based on file extension.
 318
 319    Parameters
 320    ----------
 321    ifile : PathLike
 322        The path to the HDF file.
 323    hdf4_func : Callable
 324        The function to call for HDF4 files.
 325    hdf5_func : Callable
 326        The function to call for HDF5 files.
 327    *args : Any
 328        Positional arguments to pass to the selected function.
 329    **kwargs : Any
 330        Keyword arguments to pass to the selected function.
 331
 332    Returns
 333    -------
 334    out : Any
 335        The return value of the dispatched function.
 336
 337    Raises
 338    ------
 339    ValueError
 340        If the file does not have a `.hdf` or `.h5` extension.
 341    ImportError
 342        If the file is HDF4 and the `pyhdf` package is not available.
 343
 344    Examples
 345    --------
 346    >>> from psi_io.psi_io import _dispatch_by_ext, _read_h5_data, _read_h4_data
 347    >>> from psi_io.data import get_3d_data
 348    >>> filepath = get_3d_data()
 349    >>> data, *_ = _dispatch_by_ext(filepath, _read_h4_data, _read_h5_data)
 350    >>> data.shape
 351    (181, 100, 151)
 352    """
 353    ipath = Path(ifile)
 354    if ipath.suffix == '.h5':
 355        return hdf5_func(ifile, *args, **kwargs)
 356    if ipath.suffix == '.hdf':
 357        _except_no_pyhdf()
 358        return hdf4_func(ifile, *args, **kwargs)
 359    raise ValueError("File must be HDF4 (.hdf) or HDF5 (.h5)")
 360
 361
 362# -----------------------------------------------------------------------------
 363# "Classic" HDF reading and writing routines adapted from psihdf.py or psi_io.py.
 364# -----------------------------------------------------------------------------
 365
 366
[docs] 367def rdhdf_1d(hdf_filename: PathLike 368 ) -> Tuple[np.ndarray, np.ndarray]: 369 """Read a 1D PSI-style HDF5 or HDF4 file. 370 371 Parameters 372 ---------- 373 hdf_filename : PathLike 374 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read. 375 376 Returns 377 ------- 378 x : np.ndarray 379 1D coordinate scale array. 380 f : np.ndarray 381 1D data array. 382 383 See Also 384 -------- 385 read_hdf_data : Generic HDF data reading routine. 386 387 Examples 388 -------- 389 >>> from psi_io import rdhdf_1d 390 >>> from psi_io.data import get_1d_data 391 >>> filepath = get_1d_data() 392 >>> x, f = rdhdf_1d(filepath) 393 >>> x.shape, f.shape 394 ((151,), (151,)) 395 """ 396 return _rdhdf_nd(hdf_filename, dimensionality=1)
397 398
[docs] 399def rdhdf_2d(hdf_filename: PathLike 400 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 401 """Read a 2D PSI-style HDF5 or HDF4 file. 402 403 The data in the HDF file is assumed to be ordered X, Y in Fortran order. 404 Each dimension is assumed to have a 1D scale associated with it that 405 describes the rectilinear grid coordinates. 406 407 Parameters 408 ---------- 409 hdf_filename : PathLike 410 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to read. 411 412 Returns 413 ------- 414 x : np.ndarray 415 1D coordinate scale array for the X dimension. 416 y : np.ndarray 417 1D coordinate scale array for the Y dimension. 418 f : np.ndarray 419 2D data array, C-ordered with shape ``(ny, nx)`` in Python. 420 421 Notes 422 ----- 423 Because Fortran uses column-major ordering and Python uses row-major 424 ordering, the data array is returned with reversed axis order relative to 425 how it is stored on disk. The first returned scale always corresponds to 426 the *slowest* varying dimension in the data array (i.e. the last axis). 427 428 See Also 429 -------- 430 read_hdf_data : Generic HDF data reading routine. 431 432 Examples 433 -------- 434 >>> from psi_io import rdhdf_2d 435 >>> from psi_io.data import get_2d_data 436 >>> filepath = get_2d_data() 437 >>> x, y, f = rdhdf_2d(filepath) 438 >>> f.shape == (y.shape[0], x.shape[0]) 439 True 440 """ 441 return _rdhdf_nd(hdf_filename, dimensionality=2)
442 443
[docs] 444def rdhdf_3d(hdf_filename: PathLike 445 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: 446 """Read a 3D PSI-style HDF5 or HDF4 file. 447 448 The data in the HDF file is assumed to be ordered X, Y, Z in Fortran order. 449 Each dimension is assumed to have a 1D scale associated with it that 450 describes the rectilinear grid coordinates. 451 452 Parameters 453 ---------- 454 hdf_filename : PathLike 455 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to read. 456 457 Returns 458 ------- 459 x : np.ndarray 460 1D coordinate scale array for the X dimension. 461 y : np.ndarray 462 1D coordinate scale array for the Y dimension. 463 z : np.ndarray 464 1D coordinate scale array for the Z dimension. 465 f : np.ndarray 466 3D data array, C-ordered with shape ``(nz, ny, nx)`` in Python. 467 468 Notes 469 ----- 470 Because Fortran uses column-major ordering and Python uses row-major 471 ordering, the data array is returned with reversed axis order relative to 472 how it is stored on disk. For PSI spherical datasets the returned shapes 473 follow the convention ``f.shape == (n_phi, n_theta, n_r)`` with scales 474 returned in physical order ``(r, theta, phi)``. 475 476 See Also 477 -------- 478 read_hdf_data : Generic HDF data reading routine. 479 480 Examples 481 -------- 482 >>> from psi_io import rdhdf_3d 483 >>> from psi_io.data import get_3d_data 484 >>> filepath = get_3d_data() 485 >>> r, t, p, f = rdhdf_3d(filepath) 486 >>> f.shape == (p.shape[0], t.shape[0], r.shape[0]) 487 True 488 """ 489 return _rdhdf_nd(hdf_filename, dimensionality=3)
490 491
[docs] 492def wrhdf_1d(hdf_filename: PathLike, 493 x: np.ndarray, 494 f: np.ndarray, 495 **kwargs) -> None: 496 """Write a 1D PSI-style HDF5 or HDF4 file. 497 498 Parameters 499 ---------- 500 hdf_filename : PathLike 501 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to write. 502 x : np.ndarray 503 1D array of scales. 504 f : np.ndarray 505 1D array of data. 506 **kwargs 507 Additional keyword arguments passed through to the underlying 508 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically: 509 510 - ``dataset_id`` : str | None 511 The identifier of the dataset to write. 512 If None, a default dataset is used ('Data-Set-2' for HDF4 and 'Data' for HDF5). 513 - ``sync_dtype``: bool 514 If True, the data type of the scales will be matched to that of the data array. 515 516 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to 517 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between 518 the dataset and scales. 519 520 Raises 521 ------ 522 ValueError 523 If the file does not have a ``.hdf`` or ``.h5`` extension. 524 KeyError 525 If, for HDF4 files, the data or scale dtype is not supported by 526 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype 527 support table. 528 529 Notes 530 ----- 531 This routine is provided for backward compatibility with existing PSI codes that 532 expect this API signature. The underlying implementation dispatches to 533 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default 534 behavior (sync dtype, default dataset IDs) are preserved. 535 536 .. warning:: 537 When called with its default arguments this routine **writes to the default PSI 538 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and 539 scales.** This behavior is required for interoperability with certain Fortran-based 540 PSI tools. 541 542 See Also 543 -------- 544 write_hdf_data : Generic HDF data writing routine. 545 546 Examples 547 -------- 548 >>> import tempfile, numpy as np 549 >>> from pathlib import Path 550 >>> from psi_io import wrhdf_1d, rdhdf_1d 551 >>> x = np.linspace(0.0, 1.0, 50, dtype=np.float32) 552 >>> f = np.sin(x) 553 >>> with tempfile.TemporaryDirectory() as d: 554 ... wrhdf_1d(Path(d) / "out.h5", x, f) 555 ... x2, f2 = rdhdf_1d(Path(d) / "out.h5") 556 ... np.array_equal(f, f2) 557 True 558 """ 559 return _wrhdf_nd(hdf_filename, f, x, dimensionality=1, **kwargs)
560 561
[docs] 562def wrhdf_2d(hdf_filename: PathLike, 563 x: np.ndarray, 564 y: np.ndarray, 565 f: np.ndarray, 566 **kwargs) -> None: 567 """Write a 2D PSI-style HDF5 or HDF4 file. 568 569 The data in the HDF file will appear as X,Y in Fortran order. 570 571 Each dimension requires a 1D "scale" associated with it that 572 describes the rectilinear grid coordinates in each dimension. 573 574 Parameters 575 ---------- 576 hdf_filename : PathLike 577 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to write. 578 x : np.ndarray 579 1D array of scales in the X dimension. 580 y : np.ndarray 581 1D array of scales in the Y dimension. 582 f : np.ndarray 583 2D data array, C-ordered with shape ``(ny, nx)`` in Python. 584 **kwargs 585 Additional keyword arguments passed through to the underlying 586 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically: 587 588 - ``dataset_id`` : str | None 589 The identifier of the dataset to write. 590 If None, a default dataset is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5). 591 - ``sync_dtype``: bool 592 If True, the data type of the scales will be matched to that of the data array. 593 594 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to 595 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between 596 the dataset and scales. 597 598 Raises 599 ------ 600 ValueError 601 If the file does not have a ``.hdf`` or ``.h5`` extension. 602 KeyError 603 If, for HDF4 files, the data or scale dtype is not supported by 604 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype 605 support table. 606 607 Notes 608 ----- 609 This routine is provided for backward compatibility with existing PSI codes that 610 expect this API signature. The underlying implementation dispatches to 611 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default 612 behavior (sync dtype, default dataset IDs) are preserved. 613 614 .. warning:: 615 When called with its default arguments this routine **writes to the default PSI 616 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and 617 scales.** This behavior is required for interoperability with certain Fortran-based 618 PSI tools. 619 620 See Also 621 -------- 622 write_hdf_data : Generic HDF data writing routine. 623 624 Examples 625 -------- 626 >>> import tempfile, numpy as np 627 >>> from pathlib import Path 628 >>> from psi_io import wrhdf_2d, rdhdf_2d 629 >>> x = np.linspace(0.0, 2*np.pi, 64, dtype=np.float32) 630 >>> y = np.linspace(0.0, np.pi, 32, dtype=np.float32) 631 >>> f = np.outer(np.sin(x), np.cos(y)).astype(np.float32) 632 >>> with tempfile.TemporaryDirectory() as d: 633 ... wrhdf_2d(Path(d) / "out.h5", x, y, f) 634 ... x2, y2, f2 = rdhdf_2d(Path(d) / "out.h5") 635 ... np.array_equal(f, f2) 636 True 637 """ 638 return _wrhdf_nd(hdf_filename, f, x, y, dimensionality=2, **kwargs)
639 640
[docs] 641def wrhdf_3d(hdf_filename: PathLike, 642 x: np.ndarray, 643 y: np.ndarray, 644 z: np.ndarray, 645 f: np.ndarray, 646 **kwargs) -> None: 647 """Write a 3D PSI-style HDF5 or HDF4 file. 648 649 The data in the HDF file will appear as X,Y,Z in Fortran order. 650 651 Each dimension requires a 1D "scale" associated with it that 652 describes the rectilinear grid coordinates in each dimension. 653 654 Parameters 655 ---------- 656 hdf_filename : PathLike 657 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to write. 658 x : np.ndarray 659 1D array of scales in the X dimension. 660 y : np.ndarray 661 1D array of scales in the Y dimension. 662 z : np.ndarray 663 1D array of scales in the Z dimension. 664 f : np.ndarray 665 3D data array, C-ordered with shape ``(nz, ny, nx)`` in Python. 666 **kwargs 667 Additional keyword arguments passed through to the underlying 668 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically: 669 670 - ``dataset_id`` : str | None 671 The identifier of the dataset to write. 672 If None, a default dataset is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5). 673 - ``sync_dtype``: bool 674 If True, the data type of the scales will be matched to that of the data array. 675 676 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to 677 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between 678 the dataset and scales. 679 680 Raises 681 ------ 682 ValueError 683 If the file does not have a ``.hdf`` or ``.h5`` extension. 684 KeyError 685 If, for HDF4 files, the data or scale dtype is not supported by 686 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype 687 support table. 688 689 Notes 690 ----- 691 This routine is provided for backward compatibility with existing PSI codes that 692 expect this API signature. The underlying implementation dispatches to 693 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default 694 behavior (sync dtype, default dataset IDs) are preserved. 695 696 .. warning:: 697 When called with its default arguments this routine **writes to the default PSI 698 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and 699 scales.** This behavior is required for interoperability with certain Fortran-based 700 PSI tools. 701 702 See Also 703 -------- 704 write_hdf_data : Generic HDF data writing routine. 705 706 Examples 707 -------- 708 >>> import tempfile, numpy as np 709 >>> from pathlib import Path 710 >>> from psi_io import wrhdf_3d, rdhdf_3d 711 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32) 712 >>> t = np.linspace(0.0, np.pi, 20, dtype=np.float32) 713 >>> p = np.linspace(0.0, 2*np.pi, 30, dtype=np.float32) 714 >>> f = np.ones((30, 20, 10), dtype=np.float32) 715 >>> with tempfile.TemporaryDirectory() as d: 716 ... wrhdf_3d(Path(d) / "out.h5", r, t, p, f) 717 ... r2, t2, p2, f2 = rdhdf_3d(Path(d) / "out.h5") 718 ... np.array_equal(f, f2) 719 True 720 """ 721 return _wrhdf_nd(hdf_filename, f, x, y, z, dimensionality=3, **kwargs)
722 723
[docs] 724def get_scales_1d(filename: PathLike 725 ) -> np.ndarray: 726 """Return the coordinate scale of a 1D PSI-style HDF5 or HDF4 dataset. 727 728 Does not load the data array, so this is efficient for large files. 729 730 Parameters 731 ---------- 732 filename : PathLike 733 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read. 734 735 Returns 736 ------- 737 x : np.ndarray 738 1D coordinate scale array. 739 740 Examples 741 -------- 742 >>> from psi_io import get_scales_1d 743 >>> from psi_io.data import get_1d_data 744 >>> filepath = get_1d_data() 745 >>> x = get_scales_1d(filepath) 746 >>> x.ndim 747 1 748 """ 749 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5, 750 dimensionality=1)
751 752
[docs] 753def get_scales_2d(filename: PathLike 754 ) -> Tuple[np.ndarray, np.ndarray]: 755 """Return the coordinate scales of a 2D PSI-style HDF5 or HDF4 dataset. 756 757 Does not load the data array, so this is efficient for large files. 758 The data in the HDF file is assumed to be ordered X, Y in Fortran order. 759 760 Parameters 761 ---------- 762 filename : PathLike 763 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to read. 764 765 Returns 766 ------- 767 x : np.ndarray 768 1D coordinate scale array for the X dimension. 769 y : np.ndarray 770 1D coordinate scale array for the Y dimension. 771 772 Examples 773 -------- 774 >>> from psi_io import get_scales_2d 775 >>> from psi_io.data import get_2d_data 776 >>> filepath = get_2d_data() 777 >>> x, y = get_scales_2d(filepath) 778 >>> x.ndim, y.ndim 779 (1, 1) 780 """ 781 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5, 782 dimensionality=2)
783 784
[docs] 785def get_scales_3d(filename: PathLike 786 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 787 """Return the coordinate scales of a 3D PSI-style HDF5 or HDF4 dataset. 788 789 Does not load the data array, so this is efficient for large files. 790 The data in the HDF file is assumed to be ordered X, Y, Z in Fortran order. 791 792 Parameters 793 ---------- 794 filename : PathLike 795 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to read. 796 797 Returns 798 ------- 799 x : np.ndarray 800 1D coordinate scale array for the X dimension. 801 y : np.ndarray 802 1D coordinate scale array for the Y dimension. 803 z : np.ndarray 804 1D coordinate scale array for the Z dimension. 805 806 Examples 807 -------- 808 >>> from psi_io import get_scales_3d 809 >>> from psi_io.data import get_3d_data 810 >>> filepath = get_3d_data() 811 >>> r, t, p = get_scales_3d(filepath) 812 >>> r.ndim, t.ndim, p.ndim 813 (1, 1, 1) 814 """ 815 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5, 816 dimensionality=3)
817 818 819# ----------------------------------------------------------------------------- 820# "Updated" HDF reading and slicing routines for Hdf4 and Hdf5 datasets. 821# ----------------------------------------------------------------------------- 822 823
[docs] 824def read_hdf_meta(ifile: PathLike, /, 825 dataset_id: Optional[str] = None 826 ) -> List[HdfDataMeta]: 827 """ 828 Read metadata from an HDF4 (.hdf) or HDF5 (.h5) file. 829 830 This function provides a unified interface to read metadata from both HDF4 and HDF5 files. 831 832 .. warning:: 833 Unlike elsewhere in this module, the scales and datasets are read **as is**, *i.e.* without 834 reordering scales to match PSI's Fortran data ecosystem. 835 836 .. warning:: 837 Unlike elsewhere in this module, when ``None`` is passed to ``dataset_id``, all (non-scale) 838 datasets are returned (instead of the default psi datasets *e.g.* 'Data-Set-2' or 'Data'). 839 This will, effectively, return the standard PSI datasets when reading PSI-style HDF files. 840 841 Parameters 842 ---------- 843 ifile : PathLike 844 The path to the HDF file to read. 845 dataset_id : str | None, optional 846 The identifier of the dataset for which to read metadata. 847 If ``None``, metadata for **all** datasets is returned. Default is ``None``. 848 849 Returns 850 ------- 851 out : list[HdfDataMeta] 852 A list of metadata objects corresponding to the specified datasets. 853 854 Raises 855 ------ 856 ValueError 857 If the file does not have a ``.hdf`` or ``.h5`` extension. 858 859 Notes 860 ----- 861 This function delegates to :func:`_read_h5_meta` for HDF5 files and 862 :func:`_read_h4_meta` for HDF4 files based on the file extension. 863 864 Although this function is designed to read metadata for dataset objects, it is possible 865 to read metadata for coordinate variables (scales) by passing their names to 866 ``dataset_id``, *e.g.* ``'dim1'``, ``'dim2'``, etc. However, this is not the 867 intended use case. 868 869 Examples 870 -------- 871 >>> from psi_io import read_hdf_meta 872 >>> from psi_io.data import get_3d_data 873 >>> filepath = get_3d_data() 874 >>> meta = read_hdf_meta(filepath) 875 >>> meta[0].name 876 'Data' 877 >>> meta[0].shape 878 (181, 100, 151) 879 """ 880 881 return _dispatch_by_ext(ifile, _read_h4_meta, _read_h5_meta, 882 dataset_id=dataset_id)
883 884
[docs] 885def read_rtp_meta(ifile: PathLike, /) -> Dict: 886 """ 887 Read the scale metadata for PSI's 3D cubes. 888 889 Parameters 890 ---------- 891 ifile : PathLike 892 The path to the HDF file to read. 893 894 Returns 895 ------- 896 out : dict 897 A dictionary containing the RTP metadata. 898 The value for each key ('r', 't', and 'p') is a tuple containing: 899 900 1. The scale length 901 2. The scale's value at the first index 902 3. The scale's value at the last index 903 904 Raises 905 ------ 906 ValueError 907 If the file does not have a `.hdf` or `.h5` extension. 908 909 Notes 910 ----- 911 This function delegates to :func:`_read_h5_rtp` for HDF5 files and 912 :func:`_read_h4_rtp` for HDF4 files based on the file extension. 913 914 Examples 915 -------- 916 >>> from psi_io import read_rtp_meta 917 >>> from psi_io.data import get_3d_data 918 >>> filepath = get_3d_data() 919 >>> meta = read_rtp_meta(filepath) 920 >>> sorted(meta.keys()) 921 ['p', 'r', 't'] 922 >>> len(meta['r']) 923 3 924 """ 925 return _dispatch_by_ext(ifile, _read_h4_rtp, _read_h5_rtp)
926 927
[docs] 928def read_hdf_data(ifile: PathLike, /, 929 dataset_id: Optional[str] = None, 930 return_scales: bool = True, 931 ) -> Tuple[np.ndarray]: 932 """ 933 Read data from an HDF4 (.hdf) or HDF5 (.h5) file. 934 935 Parameters 936 ---------- 937 ifile : PathLike 938 The path to the HDF file to read. 939 dataset_id : str | None, optional 940 The identifier of the dataset to read. 941 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and 942 ``'Data'`` for HDF5). Default is ``None``. 943 return_scales : bool, optional 944 If ``True``, the coordinate scale arrays for each dimension are also 945 returned. Default is ``True``. 946 947 Returns 948 ------- 949 out : np.ndarray | tuple[np.ndarray, ...] 950 The data array. If ``return_scales`` is ``True``, returns a tuple 951 ``(data, scale_0, scale_1, ...)`` with one scale per dimension. 952 953 Raises 954 ------ 955 ValueError 956 If the file does not have a ``.hdf`` or ``.h5`` extension. 957 958 See Also 959 -------- 960 read_hdf_by_index : Read HDF datasets by index. 961 read_hdf_by_value : Read HDF datasets by value ranges. 962 read_hdf_by_ivalue : Read HDF datasets by subindex values. 963 964 Notes 965 ----- 966 This function delegates to :func:`_read_h5_data` for HDF5 files and 967 :func:`_read_h4_data` for HDF4 files based on the file extension. 968 969 Examples 970 -------- 971 >>> from psi_io import read_hdf_data 972 >>> from psi_io.data import get_3d_data 973 >>> filepath = get_3d_data() 974 >>> data, r, t, p = read_hdf_data(filepath) 975 >>> data.shape 976 (181, 100, 151) 977 >>> r.shape, t.shape, p.shape 978 ((151,), (100,), (181,)) 979 """ 980 return _dispatch_by_ext(ifile, _read_h4_data, _read_h5_data, 981 dataset_id=dataset_id, return_scales=return_scales)
982 983
[docs] 984def read_hdf_by_index(ifile: PathLike, /, 985 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None], 986 dataset_id: Optional[str] = None, 987 return_scales: bool = True, 988 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 989 r""" 990 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by index. 991 992 .. attention:: 993 For each dimension, the *minimum* number of elements returned is 1 *e.g.* 994 if 3 ints are passed (as positional `*xi` arguments) for a 3D dataset, 995 the resulting subset will have a shape of (1, 1, 1,) with scales of length 1. 996 997 Parameters 998 ---------- 999 ifile : PathLike 1000 The path to the HDF file to read. 1001 *xi : int | tuple[int | None, int | None] | None 1002 Indices or ranges for each dimension of the `n`-dimensional dataset. 1003 Use None for a dimension to select all indices. If no arguments are passed, 1004 the entire dataset (and its scales) will be returned – see 1005 :func:`~psi_io.psi_io.read_hdf_data`. 1006 dataset_id : str | None, optional 1007 The identifier of the dataset to read. 1008 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and 1009 ``'Data'`` for HDF5). Default is ``None``. 1010 return_scales : bool, optional 1011 If ``True``, the coordinate scale arrays for each dimension are also 1012 returned. Default is ``True``. 1013 1014 Returns 1015 ------- 1016 out : np.ndarray | tuple[np.ndarray, ...] 1017 The selected data array. If ``return_scales`` is ``True``, returns a 1018 tuple ``(data, scale_0, scale_1, ...)`` with one scale per dimension. 1019 1020 Raises 1021 ------ 1022 ValueError 1023 If the file does not have a ``.hdf`` or ``.h5`` extension. 1024 1025 See Also 1026 -------- 1027 read_hdf_by_value : Read HDF datasets by value ranges. 1028 read_hdf_by_ivalue : Read HDF datasets by subindex values. 1029 read_hdf_data : Read entire HDF datasets. 1030 1031 Notes 1032 ----- 1033 This function delegates to :func:`_read_h5_by_index` for HDF5 files and 1034 :func:`_read_h4_by_index` for HDF4 files based on the file extension. 1035 1036 This function assumes Fortran (column-major) ordering for compatibility with 1037 PSI's data ecosystem. For an :math:`n`-dimensional array of shape 1038 :math:`(D_0, D_1, \ldots, D_{n-1})` the scales satisfy 1039 :math:`|x_i| = |D_{(n-1)-i}|`. For example, a 3D dataset of shape 1040 :math:`(D_\phi, D_\theta, D_r)` has scales :math:`r, \theta, \phi`. 1041 1042 Each ``*xi`` argument is forwarded to Python's built-in :class:`slice` to 1043 extract the desired subset without reading the entire dataset into memory. 1044 1045 Examples 1046 -------- 1047 Import a 3D HDF5 cube. 1048 1049 >>> from psi_io.data import get_3d_data 1050 >>> from psi_io import read_hdf_by_index 1051 >>> filepath = get_3d_data() 1052 1053 Extract a radial slice at the first radial index from a 3D cube: 1054 1055 >>> f, r, t, p = read_hdf_by_index(filepath, 0, None, None) 1056 >>> f.shape, r.shape, t.shape, p.shape 1057 ((181, 100, 1), (1,), (100,), (181,)) 1058 1059 Extract a phi slice at the 90th index from a 3D cube: 1060 1061 >>> f, r, t, p = read_hdf_by_index(filepath, None, None, 90) 1062 >>> f.shape, r.shape, t.shape, p.shape 1063 ((1, 100, 151), (151,), (100,), (1,)) 1064 1065 Extract up to the 20th radial index with phi indices 10 to 25: 1066 1067 >>> f, r, t, p = read_hdf_by_index(filepath, (None, 20), None, (10, 25)) 1068 >>> f.shape, r.shape, t.shape, p.shape 1069 ((15, 100, 20), (20,), (100,), (15,)) 1070 """ 1071 if not xi: 1072 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales) 1073 return _dispatch_by_ext(ifile, _read_h4_by_index, _read_h5_by_index, 1074 *xi, dataset_id=dataset_id, return_scales=return_scales)
1075 1076
[docs] 1077def read_hdf_by_value(ifile: PathLike, /, 1078 *xi: Union[float, Tuple[float, float], None], 1079 dataset_id: Optional[str] = None, 1080 return_scales: bool = True, 1081 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 1082 """ 1083 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by value. 1084 1085 .. note:: 1086 For each dimension, the minimum number of elements returned is 2 *e.g.* 1087 if 3 floats are passed (as positional `*xi` arguments) for a 3D dataset, 1088 the resulting subset will have a shape of (2, 2, 2,) with scales of length 2. 1089 1090 Parameters 1091 ---------- 1092 ifile : PathLike 1093 The path to the HDF file to read. 1094 *xi : float | tuple[float, float] | None 1095 Values or value ranges corresponding to each dimension of the `n`-dimensional 1096 dataset specified by ``dataset_id``. Pass ``None`` for a dimension to 1097 select all indices. If no arguments are passed, the entire dataset (and 1098 its scales) will be returned. 1099 dataset_id : str | None, optional 1100 The identifier of the dataset to read. 1101 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and 1102 ``'Data'`` for HDF5). Default is ``None``. 1103 return_scales : bool, optional 1104 If ``True``, the coordinate scale arrays for each dimension are also 1105 returned. Default is ``True``. 1106 1107 Returns 1108 ------- 1109 out : np.ndarray | tuple[np.ndarray, ...] 1110 The selected data array. If ``return_scales`` is ``True``, returns a 1111 tuple ``(data, scale_0, scale_1, ...)`` with one scale per dimension. 1112 1113 Raises 1114 ------ 1115 ValueError 1116 If the file does not have a ``.hdf`` or ``.h5`` extension. 1117 1118 See Also 1119 -------- 1120 read_hdf_by_index : Read HDF datasets by index. 1121 read_hdf_by_ivalue : Read HDF datasets by subindex values. 1122 read_hdf_data : Read entire HDF datasets. 1123 sp_interpolate_slice_from_hdf : Interpolate slices using SciPy's 1124 :class:`~scipy.interpolate.RegularGridInterpolator`. 1125 np_interpolate_slice_from_hdf : Linear/bilinear/trilinear interpolation 1126 using vectorized NumPy routines. 1127 1128 Notes 1129 ----- 1130 This function delegates to :func:`_read_h5_by_value` for HDF5 files and 1131 :func:`_read_h4_by_value` for HDF4 files based on the file extension. 1132 1133 This function assumes that the dataset is Fortran (or column-major) ordered *viz.* for 1134 compatibility with PSI's data ecosystem; as such, a given :math:`n`-dimensional array, 1135 of shape :math:`(D_0, D_1, ..., D_{n-1})`, has scales :math:`(x_0, x_1, ..., x_{n-1})`, 1136 such that :math:`| x_i | = | D_{(n-1)-i} |`. For example, a 3D dataset with shape 1137 :math:`(D_p, D_t, D_r)` has scales :math:`r, t, p` corresponding to the radial, theta, 1138 and phi dimensions respectively. 1139 1140 This function extracts a subset of the given dataset/scales without reading the 1141 entire data into memory. For a given scale :math:`x_j`, if: 1142 1143 - *i)* a single float is provided (:math:`a`), the function will return a 2-element 1144 subset of the scale (:math:`xʹ_j`) such that :math:`xʹ_j[0] <= a < xʹ_j[1]`. 1145 - *ii)* a (float, float) tuple is provided (:math:`a_0, a_1`), the function will return an 1146 *m*-element subset of the scale (:math:`xʹ_j`) where 1147 :math:`xʹ_j[0] <= a_0` and :math:`xʹ_j[m-1] > a_1`. 1148 - *iii)* a **None** value is provided, the function will return the entire scale :math:`x_j` 1149 1150 The returned subset can then be passed to a linear interpolation routine to extract the 1151 "slice" at the desired fixed dimensions. 1152 1153 Examples 1154 -------- 1155 Import a 3D HDF5 cube. 1156 1157 >>> from psi_io.data import get_3d_data 1158 >>> from psi_io import read_hdf_by_value 1159 >>> filepath = get_3d_data() 1160 1161 Extract a radial slice at r=15 from a 3D cube: 1162 1163 >>> f, r, t, p = read_hdf_by_value(filepath, 15, None, None) 1164 >>> f.shape, r.shape, t.shape, p.shape 1165 ((181, 100, 2), (2,), (100,), (181,)) 1166 1167 Extract a phi slice at p=1.57 from a 3D cube: 1168 1169 >>> f, r, t, p = read_hdf_by_value(filepath, None, None, 1.57) 1170 >>> f.shape, r.shape, t.shape, p.shape 1171 ((2, 100, 151), (151,), (100,), (2,)) 1172 1173 Extract the values between 3.2 and 6.4 (in the radial dimension) and with 1174 phi equal to 4.5 1175 1176 >>> f, r, t, p = read_hdf_by_value(filepath, (3.2, 6.4), None, 4.5) 1177 >>> f.shape, r.shape, t.shape, p.shape 1178 ((2, 100, 15), (15,), (100,), (2,)) 1179 """ 1180 if not xi: 1181 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales) 1182 return _dispatch_by_ext(ifile, _read_h4_by_value, _read_h5_by_value, 1183 *xi, dataset_id=dataset_id, return_scales=return_scales)
1184 1185
[docs] 1186def read_hdf_by_ivalue(ifile: PathLike, /, 1187 *xi: Union[float, Tuple[float, float], None], 1188 dataset_id: Optional[str] = None, 1189 return_scales: bool = True, 1190 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 1191 r""" 1192 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by subindex value. 1193 1194 Unlike :func:`read_hdf_by_value` (which works in physical coordinate space), 1195 this function accepts fractional *index* positions and returns the minimal 1196 bracketing subset required for interpolation. 1197 1198 .. note:: 1199 For each dimension the minimum number of elements returned is 2. For 1200 example, passing 3 scalar floats for a 3D dataset yields a subset of 1201 shape ``(2, 2, 2)`` with index arrays of length 2. 1202 1203 Parameters 1204 ---------- 1205 ifile : PathLike 1206 The path to the HDF file to read. 1207 *xi : float | tuple[float, float] | None 1208 Fractional index values or ranges for each dimension of the 1209 ``n``-dimensional dataset. Pass ``None`` to select an entire dimension. 1210 If no arguments are passed, the full dataset is returned. 1211 dataset_id : str | None, optional 1212 The identifier of the dataset to read. If ``None``, a default dataset 1213 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5). 1214 Default is ``None``. 1215 return_scales : bool, optional 1216 If ``True``, 0-based index arrays (generated with :func:`~numpy.arange`) 1217 are returned for each dimension alongside the data. These are always 1218 index-space arrays regardless of whether the dataset has physical 1219 coordinate scales. Default is ``True``. 1220 1221 Returns 1222 ------- 1223 out : np.ndarray | tuple[np.ndarray, ...] 1224 The selected data array. If ``return_scales`` is ``True``, returns a 1225 tuple ``(data, idx_0, idx_1, ...)`` with one index array per dimension. 1226 1227 Raises 1228 ------ 1229 ValueError 1230 If the file does not have a ``.hdf`` or ``.h5`` extension. 1231 1232 See Also 1233 -------- 1234 read_hdf_by_index : Read HDF datasets by integer index. 1235 read_hdf_by_value : Read HDF datasets by physical coordinate value. 1236 read_hdf_data : Read entire HDF datasets. 1237 sp_interpolate_slice_from_hdf : Interpolate slices using SciPy's 1238 :class:`~scipy.interpolate.RegularGridInterpolator`. 1239 np_interpolate_slice_from_hdf : Linear/bilinear/trilinear interpolation 1240 using vectorized NumPy routines. 1241 1242 Notes 1243 ----- 1244 This function delegates to :func:`_read_h5_by_ivalue` for HDF5 files and 1245 :func:`_read_h4_by_ivalue` for HDF4 files based on the file extension. 1246 1247 For a given dimension with scale :math:`x_j`, the bracketing rule is: 1248 1249 - *single float* :math:`a` → returns 1250 :math:`x_j[\lfloor a \rfloor], x_j[\lceil a \rceil]`. 1251 - *(float, float)* :math:`(a_0, a_1)` → returns 1252 :math:`x_j[\lfloor a_0 \rfloor], \ldots, x_j[\lceil a_1 \rceil]`. 1253 - ``None`` → returns the entire scale :math:`x_j`. 1254 1255 Examples 1256 -------- 1257 >>> from psi_io.data import get_3d_data 1258 >>> from psi_io import read_hdf_by_ivalue 1259 >>> filepath = get_3d_data() 1260 1261 Extract a 2-element bracket around fractional radial index 2.7: 1262 1263 >>> f, r_idx, t_idx, p_idx = read_hdf_by_ivalue(filepath, 2.7, None, None) 1264 >>> r_idx.shape 1265 (2,) 1266 """ 1267 if not xi: 1268 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales) 1269 return _dispatch_by_ext(ifile, _read_h4_by_ivalue, _read_h5_by_ivalue, 1270 *xi, dataset_id=dataset_id, return_scales=return_scales)
1271 1272
[docs] 1273def write_hdf_data(ifile: PathLike, /, 1274 data: np.ndarray, 1275 *scales: Sequence[Union[np.ndarray, None]], 1276 dataset_id: Optional[str] = None, 1277 sync_dtype: bool = False, 1278 strict: bool = True, 1279 **kwargs 1280 ) -> Path: 1281 """ 1282 Write data to an HDF4 (.hdf) or HDF5 (.h5) file. 1283 1284 Following PSI conventions, the data array is assumed to be Fortran-ordered, 1285 with the scales provided in the order corresponding to each dimension *e.g.* a 1286 3D dataset with shape (Dp, Dt, Dr) has scales r, t, p corresponding to the 1287 radial, theta, and phi dimensions respectively. 1288 1289 Parameters 1290 ---------- 1291 ifile : PathLike 1292 The path to the HDF file to write. 1293 data : np.ndarray 1294 The data array to write. 1295 *scales : Sequence[np.ndarray | None] 1296 The scales (coordinate arrays) for each dimension. 1297 dataset_id : str | None, optional 1298 The identifier of the dataset to write. If ``None``, a default dataset 1299 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5). 1300 Default is ``None``. 1301 sync_dtype : bool, optional 1302 If ``True``, the scale dtypes are cast to match the data array dtype. 1303 This mimics the behavior of PSI's legacy HDF writing routines and ensures 1304 compatibility with Fortran tools that require uniform precision between 1305 datasets and their scales. Default is ``False``. 1306 strict : bool, optional 1307 If ``True``, raise an error if any dataset attribute cannot be written to 1308 the target format. If ``False``, a warning is printed and the attribute 1309 is skipped. Default is ``True``. 1310 **kwargs 1311 Key-value pairs of dataset attributes to attach to the dataset. 1312 1313 Returns 1314 ------- 1315 out : Path 1316 The path to the written HDF file. 1317 1318 Raises 1319 ------ 1320 ValueError 1321 If the file does not have a ``.hdf`` or ``.h5`` extension. 1322 KeyError 1323 If, for HDF4 files, the data or scale dtype is not supported by 1324 :py:mod:`pyhdf`. See the dtype support table in the Notes section. 1325 1326 Notes 1327 ----- 1328 This function delegates to :func:`_write_h5_data` for HDF5 files and 1329 :func:`_write_h4_data` for HDF4 files based on the file extension. 1330 1331 If no scales are provided the dataset is written without coordinate variables. 1332 The number of scales may be less than or equal to the number of dimensions; 1333 pass ``None`` for dimensions that should not have an attached scale. 1334 1335 The table below summarises dtype support across formats. HDF4 support is 1336 determined by the :data:`DTYPE_TO_SDC` mapping; HDF5 support is provided 1337 by :mod:`h5py`. 1338 1339 .. list-table:: NumPy dtype support by format 1340 :header-rows: 1 1341 :widths: 20 20 20 1342 1343 * - dtype 1344 - HDF4 (``.hdf``) 1345 - HDF5 (``.h5``) 1346 * - ``float16`` 1347 - No 1348 - Yes 1349 * - ``float32`` 1350 - Yes 1351 - Yes 1352 * - ``float64`` 1353 - Yes 1354 - Yes 1355 * - ``int8`` 1356 - Yes 1357 - Yes 1358 * - ``int16`` 1359 - Yes 1360 - Yes 1361 * - ``int32`` 1362 - Yes 1363 - Yes 1364 * - ``int64`` 1365 - No 1366 - Yes 1367 * - ``uint8`` 1368 - Yes 1369 - Yes 1370 * - ``uint16`` 1371 - Yes 1372 - Yes 1373 * - ``uint32`` 1374 - Yes 1375 - Yes 1376 * - ``uint64`` 1377 - No 1378 - Yes 1379 * - ``complex64`` 1380 - No 1381 - Yes 1382 * - ``complex128`` 1383 - No 1384 - Yes 1385 * - ``bool`` 1386 - Yes (stored as ``uint8``) 1387 - Yes 1388 1389 See Also 1390 -------- 1391 wrhdf_1d : Write 1D HDF files. 1392 wrhdf_2d : Write 2D HDF files. 1393 wrhdf_3d : Write 3D HDF files. 1394 1395 Examples 1396 -------- 1397 >>> import tempfile, numpy as np 1398 >>> from pathlib import Path 1399 >>> from psi_io import write_hdf_data, read_hdf_data 1400 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32) 1401 >>> t = np.linspace(0.0, np.pi, 20, dtype=np.float32) 1402 >>> p = np.linspace(0.0, 2*np.pi, 30, dtype=np.float32) 1403 >>> f = np.ones((30, 20, 10), dtype=np.float32) 1404 >>> with tempfile.TemporaryDirectory() as d: 1405 ... write_hdf_data(Path(d) / "out.h5", f, r, t, p) 1406 ... data, r2, t2, p2 = read_hdf_data(Path(d) / "out.h5") 1407 ... data.shape 1408 (30, 20, 10) 1409 """ 1410 return _dispatch_by_ext(ifile, _write_h4_data, _write_h5_data, data, 1411 *scales, dataset_id=dataset_id, sync_dtype=sync_dtype, strict=strict, **kwargs)
1412 1413
[docs] 1414def convert(ifile: PathLike, 1415 ofile: Optional[PathLike] = None, 1416 strict: bool = True) -> Path: 1417 """ 1418 Convert an HDF file between HDF4 (.hdf) and HDF5 (.h5) formats. 1419 1420 All datasets and their attributes are preserved in the output file. 1421 The output format is inferred from the input: `.hdf` → `.h5` and vice versa, 1422 unless an explicit output path is provided. 1423 1424 Parameters 1425 ---------- 1426 ifile : PathLike 1427 The path to the source HDF file. 1428 ofile : PathLike, optional 1429 The path to the output HDF file. 1430 If ``None``, the output file is written alongside the input file 1431 with its extension swapped (*e.g.* ``foo.hdf`` → ``foo.h5``). 1432 strict : bool, optional 1433 If ``True``, raise an error if any dataset attribute cannot be written 1434 to the output format. If ``False``, a warning is printed and the 1435 attribute is skipped. Default is ``True``. 1436 1437 Returns 1438 ------- 1439 out : Path 1440 The path to the written output file. 1441 1442 Raises 1443 ------ 1444 ValueError 1445 If the input file does not have a ``.hdf`` or ``.h5`` extension. 1446 KeyError 1447 If, for HDF4 output files, the data or an attribute value has a dtype 1448 not supported by :py:mod:`pyhdf` and ``strict`` is ``True``. See 1449 :func:`write_hdf_data` for the full dtype support table. 1450 TypeError 1451 If, for HDF5 output files, a dataset attribute has a type that cannot be 1452 stored as an HDF5 attribute and ``strict`` is ``True``. 1453 1454 See Also 1455 -------- 1456 convert_psih4_to_psih5 : Specialized PSI-convention HDF4 → HDF5 converter. 1457 write_hdf_data : Generic HDF data writing routine. 1458 read_hdf_data : Generic HDF data reading routine. 1459 1460 Examples 1461 -------- 1462 >>> import tempfile 1463 >>> from pathlib import Path 1464 >>> from psi_io import convert, read_hdf_meta 1465 >>> from psi_io.data import get_3d_data 1466 >>> h5_path = get_3d_data(hdf=".h5") 1467 >>> with tempfile.TemporaryDirectory() as d: 1468 ... out = convert(h5_path, Path(d) / "br.hdf") 1469 ... meta = read_hdf_meta(out) 1470 ... meta[0].name 1471 'Data' 1472 """ 1473 ifile = Path(ifile) 1474 if not ofile: 1475 ofile = ifile.with_suffix(".hdf") if ifile.suffix == ".h5" else ifile.with_suffix(".h5") 1476 else: 1477 ofile = Path(ofile) 1478 ofile.parent.mkdir(parents=True, exist_ok=True) 1479 1480 meta_data = read_hdf_meta(ifile) 1481 for dataset in meta_data: 1482 data, *scales = read_hdf_data(ifile, dataset_id=dataset.name, return_scales=True) 1483 write_hdf_data(ofile, data, *scales, dataset_id=dataset.name, strict=strict, **dataset.attr) 1484 1485 return ofile
1486 1487
[docs] 1488def convert_psih4_to_psih5(ifile: PathLike, 1489 ofile: Optional[PathLike] = None) -> Path: 1490 """ 1491 Convert a PSI-convention HDF4 file to an HDF5 file. 1492 1493 Unlike :func:`convert`, this function is specialized for PSI-style HDF4 files: 1494 it reads the primary dataset (``'Data-Set-2'``) and writes it under the PSI HDF5 1495 dataset name (``'Data'``), preserving the dataset's attributes and scales. 1496 1497 Parameters 1498 ---------- 1499 ifile : PathLike 1500 The path to the source HDF4 (.hdf) file. 1501 ofile : PathLike, optional 1502 The path to the output HDF5 (.h5) file. 1503 If ``None``, the output file is written alongside the input file with a 1504 ``.h5`` extension (*e.g.* ``foo.hdf`` → ``foo.h5``). 1505 1506 Returns 1507 ------- 1508 out : Path 1509 The path to the written HDF5 file. 1510 1511 Raises 1512 ------ 1513 ValueError 1514 If ``ifile`` does not have a ``.hdf`` extension. 1515 ValueError 1516 If ``ofile`` does not have a ``.h5`` extension. 1517 ImportError 1518 If the :py:mod:`pyhdf` package is not available. 1519 1520 See Also 1521 -------- 1522 convert : General-purpose HDF4 ↔ HDF5 converter. 1523 write_hdf_data : Generic HDF data writing routine. 1524 1525 Examples 1526 -------- 1527 >>> import tempfile 1528 >>> from pathlib import Path 1529 >>> from psi_io import convert_psih4_to_psih5, read_hdf_meta 1530 >>> from psi_io.data import get_3d_data 1531 >>> hdf4_path = get_3d_data(hdf=".hdf") 1532 >>> with tempfile.TemporaryDirectory() as d: 1533 ... out = convert_psih4_to_psih5(hdf4_path, Path(d) / "br.h5") 1534 ... meta = read_hdf_meta(out) 1535 ... meta[0].name 1536 'Data' 1537 """ 1538 ifile = Path(ifile) 1539 ofile = ifile.with_suffix(".h5") if not ofile else Path(ofile) 1540 if ifile.suffix != ".hdf": 1541 raise ValueError(f"Input file must have a .hdf extension; got {ifile.suffix}") 1542 if ofile.suffix != ".h5": 1543 raise ValueError(f"Output file must have a .h5 extension; got {ofile.suffix}") 1544 ofile.parent.mkdir(parents=True, exist_ok=True) 1545 1546 1547 data, *scales = read_hdf_data(ifile, dataset_id=PSI_DATA_ID["h4"], return_scales=True) 1548 meta_data, *_ = read_hdf_meta(ifile, dataset_id=PSI_DATA_ID["h4"]) 1549 1550 write_hdf_data(ofile, data, *scales, 1551 dataset_id=PSI_DATA_ID["h5"], **meta_data.attr) 1552 return ofile
1553
[docs] 1554def instantiate_linear_interpolator(*args, **kwargs): 1555 r""" 1556 Instantiate a linear interpolator using the provided data and scales. 1557 1558 Parameters 1559 ---------- 1560 *args : sequence[array_like] 1561 The first argument is the data array. 1562 Subsequent arguments are the scales (coordinate arrays) for each dimension. 1563 **kwargs : dict 1564 Additional keyword arguments to pass to 1565 :class:`~scipy.interpolate.RegularGridInterpolator`. 1566 1567 Returns 1568 ------- 1569 out : RegularGridInterpolator 1570 An instance of RegularGridInterpolator initialized 1571 with the provided data and scales. 1572 1573 Raises 1574 ------ 1575 ImportError 1576 If the ``scipy`` package is not available. 1577 1578 Notes 1579 ----- 1580 This function transposes the data array and passes it along with the scales 1581 to :class:`~scipy.interpolate.RegularGridInterpolator`. Given a PSI-style 1582 Fortran-ordered 3D dataset, the resulting interpolator can be queried using 1583 :math:`(r, \theta, \phi)` coordinates. 1584 1585 Examples 1586 -------- 1587 Import a 3D HDF5 cube. 1588 1589 >>> from psi_io.data import get_3d_data 1590 >>> from psi_io import read_hdf_by_value 1591 >>> from numpy import pi 1592 >>> filepath = get_3d_data() 1593 1594 Read the dataset by value (at 15 R_sun in the radial dimension). 1595 1596 >>> data_and_scales = read_hdf_by_value(filepath, 15, None, None) 1597 >>> interpolator = instantiate_linear_interpolator(*data_and_scales) 1598 1599 Interpolate at a specific position. 1600 1601 >>> interpolator((15, pi/2, pi)) 1602 0.0012864485109423877 1603 """ 1604 _except_no_scipy() 1605 return RegularGridInterpolator( 1606 values=args[0].T, 1607 points=args[1:], 1608 **kwargs)
1609 1610
[docs] 1611def sp_interpolate_slice_from_hdf(*xi, **kwargs): 1612 r""" 1613 Interpolate a slice from HDF data using SciPy's `RegularGridInterpolator`. 1614 1615 .. note:: 1616 Slicing routines result in a dimensional reduction. The dimensions 1617 that are fixed (i.e. provided as float values in `*xi`) are removed 1618 from the output slice, while the dimensions that are not fixed 1619 (*i.e.* provided as None in `*xi`) are retained. 1620 1621 Parameters 1622 ---------- 1623 *xi : sequence 1624 Positional arguments passed-through to :func:`read_hdf_by_value`. 1625 **kwargs : dict 1626 Keyword arguments passed-through to :func:`read_hdf_by_value`. 1627 **NOTE: Instantiating a linear interpolator requires the** ``return_scales`` 1628 **keyword argument to be set to True; this function overrides 1629 any provided value for** ``return_scales`` **to ensure this behavior.** 1630 1631 Returns 1632 ------- 1633 data_slice : np.ndarray 1634 The interpolated data slice with fixed dimensions removed. 1635 scales : list[np.ndarray] 1636 Coordinate scale arrays for the retained (non-fixed) dimensions. 1637 1638 Notes 1639 ----- 1640 This function reads data from an HDF file, builds a 1641 :class:`~scipy.interpolate.RegularGridInterpolator`, and evaluates it at 1642 the requested fixed coordinates to produce the slice. 1643 1644 .. note:: 1645 The returned slice is Fortran-ordered, *e.g.* radial slices have shape 1646 :math:`(n_\phi, n_\theta)`, and :math:`\phi` slices have shape 1647 :math:`(n_r, n_\theta)`. 1648 1649 .. note:: 1650 :class:`~scipy.interpolate.RegularGridInterpolator` casts all input data 1651 to ``float64`` internally. PSI HDF datasets stored as ``float32`` will 1652 therefore be upcast during interpolation. 1653 1654 Examples 1655 -------- 1656 >>> from psi_io.data import get_3d_data 1657 >>> from psi_io import sp_interpolate_slice_from_hdf 1658 >>> from numpy import pi 1659 >>> filepath = get_3d_data() 1660 1661 Fetch a 2D slice at r=15 from 3D map 1662 1663 >>> slice_, theta_scale, phi_scale = sp_interpolate_slice_from_hdf(filepath, 15, None, None) 1664 >>> slice_.shape, theta_scale.shape, phi_scale.shape 1665 ((181, 100), (100,), (181,)) 1666 1667 Fetch a single point from 3D map 1668 1669 >>> point_value, *_ = sp_interpolate_slice_from_hdf(filepath, 1, pi/2, pi) 1670 >>> point_value 1671 6.084495480971823 1672 """ 1673 filepath, *args = xi 1674 kwargs.pop('return_scales', None) 1675 result = read_hdf_by_value(filepath, *args, **kwargs) 1676 interpolator = instantiate_linear_interpolator(*result) 1677 grid = [yi[0] if yi[0] is not None else yi[1] for yi in zip(args, result[1:])] 1678 slice_ = interpolator(tuple(np.meshgrid(*grid, indexing='ij'))) 1679 indices = [0 if yi is not None else slice(None, None) for yi in args] 1680 return slice_[tuple(indices)].T, *[yi[1] for yi in zip(args, result[1:]) if yi[0] is None]
1681 1682
[docs] 1683def np_interpolate_slice_from_hdf(ifile: PathLike, /, 1684 *xi: Union[float, Tuple[float, float], None], 1685 dataset_id: Optional[str] = None, 1686 by_index: bool = False, 1687 ): 1688 """ 1689 Interpolate a slice from HDF data using linear interpolation. 1690 1691 .. note:: 1692 Slicing routines result in a dimensional reduction. The dimensions 1693 that are fixed (i.e. provided as float values in `*xi`) are removed 1694 from the output slice, while the dimensions that are not fixed 1695 (*i.e.* provided as `None` in `*xi`) are retained. 1696 1697 Parameters 1698 ---------- 1699 ifile : PathLike 1700 The path to the HDF file to read. 1701 *xi : sequence 1702 Positional arguments passed-through to reader function. 1703 dataset_id : str | None, optional 1704 The identifier of the dataset to read. If ``None``, a default dataset 1705 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5). 1706 Default is ``None``. 1707 by_index : bool, optional 1708 If ``True``, use :func:`read_hdf_by_ivalue` to read data by fractional 1709 index values. If ``False``, use :func:`read_hdf_by_value` to read by 1710 physical coordinate values. Default is ``False``. 1711 1712 Returns 1713 ------- 1714 data_slice : np.ndarray 1715 The interpolated data slice with fixed dimensions removed. 1716 scales : list[np.ndarray] 1717 Coordinate scale arrays for the retained (non-fixed) dimensions. 1718 1719 Raises 1720 ------ 1721 ValueError 1722 If the number of dimensions to interpolate over is not supported. 1723 1724 Notes 1725 ----- 1726 This function supports linear, bilinear, and trilinear interpolation 1727 depending on the number of dimensions fixed in `xi`. 1728 1729 Examples 1730 -------- 1731 >>> from psi_io.data import get_3d_data 1732 >>> from psi_io import np_interpolate_slice_from_hdf 1733 >>> from numpy import pi 1734 >>> filepath = get_3d_data() 1735 1736 Fetch a 2D slice at r=15 from 3D map 1737 1738 >>> slice_, theta_scale, phi_scale = np_interpolate_slice_from_hdf(filepath, 15, None, None) 1739 >>> slice_.shape, theta_scale.shape, phi_scale.shape 1740 ((181, 100), (100,), (181,)) 1741 1742 Fetch a single point from 3D map 1743 1744 >>> point_value, *_ = np_interpolate_slice_from_hdf(filepath, 1, pi/2, pi) 1745 >>> point_value 1746 6.084496 1747 1748 """ 1749 reader = read_hdf_by_value if not by_index else read_hdf_by_ivalue 1750 data, *scales = reader(ifile, *xi, dataset_id=dataset_id, return_scales=True) 1751 f_ = np.transpose(data) 1752 slice_type = sum([yi is not None for yi in xi]) 1753 if slice_type == 1: 1754 return _np_linear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None] 1755 elif slice_type == 2: 1756 return _np_bilinear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None] 1757 elif slice_type == 3: 1758 return _np_trilinear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None] 1759 else: 1760 raise ValueError("Not a valid number of dimensions for supported linear interpolation methods")
1761 1762
[docs] 1763def interpolate_positions_from_hdf(ifile, *xi, **kwargs): 1764 r""" 1765 Interpolate at a list of scale positions using SciPy's 1766 :class:`~scipy.interpolate.RegularGridInterpolator`. 1767 1768 Parameters 1769 ---------- 1770 ifile : PathLike 1771 The path to the HDF file to read. 1772 *xi : sequence[np.ndarray] 1773 Coordinate values for each dimension of the ``n``-dimensional dataset. 1774 Each array must have the same length :math:`m`; the function assembles 1775 them into an :math:`m \times n` column stack for interpolation. 1776 **kwargs 1777 Keyword arguments forwarded to :func:`read_hdf_by_value`. 1778 1779 Returns 1780 ------- 1781 out : np.ndarray 1782 The interpolated values at the provided positions. 1783 1784 Notes 1785 ----- 1786 This function reads data from an HDF file, creates a linear interpolator, 1787 and interpolates at the provided scale values. For each dimension, the 1788 minimum and maximum values from the provided arrays are used to read 1789 the necessary subset of data from the HDF file *viz.* to avoid loading 1790 the entire dataset into memory. 1791 1792 Examples 1793 -------- 1794 Import a 3D HDF5 cube. 1795 1796 >>> from psi_io.data import get_3d_data 1797 >>> from psi_io import interpolate_positions_from_hdf 1798 >>> import numpy as np 1799 >>> filepath = get_3d_data() 1800 1801 Set up positions to interpolate. 1802 1803 >>> r_vals = np.array([15, 20, 25]) 1804 >>> theta_vals = np.array([np.pi/4, np.pi/2, 3*np.pi/4]) 1805 >>> phi_vals = np.array([0, np.pi, 2*np.pi]) 1806 1807 Interpolate at the specified positions. 1808 1809 >>> interpolate_positions_from_hdf(filepath, r_vals, theta_vals, phi_vals) 1810 [0.0008402743657585175, 0.000723875405654482, -0.00041033233811179216] 1811 """ 1812 xi_ = [(np.nanmin(i), np.nanmax(i)) for i in xi] 1813 f, *scales = read_hdf_by_value(ifile, *xi_, **kwargs) 1814 interpolator = instantiate_linear_interpolator(f, *scales, bounds_error=False) 1815 return interpolator(np.stack(xi, axis=len(xi[0].shape)))
1816 1817
[docs] 1818def interpolate_point_from_1d_slice(xi, scalex, values): 1819 """ 1820 Interpolate a point from a 1D slice using linear interpolation. 1821 1822 Parameters 1823 ---------- 1824 xi : float 1825 The coordinate value at which to interpolate. 1826 scalex : np.ndarray 1827 The coordinate scale array for the dimension. 1828 values : np.ndarray 1829 The 1D data array to interpolate. 1830 1831 Returns 1832 ------- 1833 out : np.ndarray 1834 The interpolated scalar value as a zero-dimensional array. 1835 1836 Examples 1837 -------- 1838 >>> import numpy as np 1839 >>> from psi_io.psi_io import interpolate_point_from_1d_slice 1840 >>> x = np.linspace(0.0, 1.0, 11) 1841 >>> f = x ** 2 1842 >>> float(interpolate_point_from_1d_slice(0.5, x, f)) 1843 0.25 1844 """ 1845 if scalex[0] > scalex[-1]: 1846 scalex, values = scalex[::-1], values[::-1] 1847 xi_ = int(np.searchsorted(scalex, xi)) 1848 sx_ = slice(*_check_index_ranges(len(scalex), xi_, xi_)) 1849 return _np_linear_interpolation([xi], [scalex[sx_]], values[sx_])
1850 1851
[docs] 1852def interpolate_point_from_2d_slice(xi, yi, scalex, scaley, values): 1853 """ 1854 Interpolate a point from a 2D slice using bilinear interpolation. 1855 1856 Parameters 1857 ---------- 1858 xi : float 1859 The coordinate value for the first dimension. 1860 yi : float 1861 The coordinate value for the second dimension. 1862 scalex : np.ndarray 1863 The coordinate scale array for the first dimension. 1864 scaley : np.ndarray 1865 The coordinate scale array for the second dimension. 1866 values : np.ndarray 1867 The 2D data array to interpolate, with shape ``(nx, ny)``. 1868 1869 Returns 1870 ------- 1871 out : np.ndarray 1872 The interpolated scalar value as a zero-dimensional array. 1873 1874 Examples 1875 -------- 1876 >>> import numpy as np 1877 >>> from psi_io.psi_io import interpolate_point_from_2d_slice 1878 >>> x = np.linspace(0.0, 1.0, 11) 1879 >>> y = np.linspace(0.0, 1.0, 11) 1880 >>> f = np.outer(x, y) 1881 >>> float(interpolate_point_from_2d_slice(0.5, 0.5, x, y, f)) 1882 0.25 1883 """ 1884 values = np.transpose(values) 1885 if scalex[0] > scalex[-1]: 1886 scalex, values = scalex[::-1], values[::-1, :] 1887 if scaley[0] > scaley[-1]: 1888 scaley, values = scaley[::-1], values[:, ::-1] 1889 xi_, yi_ = int(np.searchsorted(scalex, xi)), int(np.searchsorted(scaley, yi)) 1890 sx_, sy_ = slice(*_check_index_ranges(len(scalex), xi_, xi_)), slice(*_check_index_ranges(len(scaley), yi_, yi_)) 1891 return _np_bilinear_interpolation([xi, yi], [scalex[sx_], scaley[sy_]], values[(sx_, sy_)])
1892 1893 1894def _rdhdf_nd(hdf_filename: str, 1895 dimensionality: int 1896 ) -> Tuple[np.ndarray, ...]: 1897 """Read an n-dimensional PSI-style HDF file; shared implementation for `rdhdf_1d/2d/3d`. 1898 1899 Parameters 1900 ---------- 1901 hdf_filename : PathLike 1902 The path to the HDF5 (.h5) or HDF4 (.hdf) file to read. 1903 dimensionality : int 1904 The expected number of dimensions of the dataset. 1905 1906 Returns 1907 ------- 1908 out : tuple[np.ndarray, ...] 1909 The coordinate scales for each dimension followed by the data array. 1910 1911 Raises 1912 ------ 1913 ValueError 1914 If the dataset dimensionality does not match ``dimensionality``. 1915 1916 Examples 1917 -------- 1918 >>> from psi_io.psi_io import _rdhdf_nd 1919 >>> from psi_io.data import get_3d_data 1920 >>> r, t, p, f = _rdhdf_nd(get_3d_data(), dimensionality=3) 1921 >>> f.shape 1922 (181, 100, 151) 1923 """ 1924 f, *scales = read_hdf_data(hdf_filename) 1925 if f.ndim != dimensionality: 1926 err = f'Expected {dimensionality}D data, got {f.ndim}D data instead.' 1927 raise ValueError(err) 1928 scales = scales or (np.empty(0) for _ in f.shape) 1929 return *scales, f 1930 1931 1932def _wrhdf_nd(hdf_filename: str, 1933 data: np.ndarray, 1934 *scales: Sequence[Union[np.ndarray, None]], 1935 dimensionality: int, 1936 sync_dtype: bool = True, 1937 **kwargs 1938 ) -> None: 1939 """Write an n-dimensional PSI-style HDF file; shared implementation for `wrhdf_1d/2d/3d`. 1940 1941 Parameters 1942 ---------- 1943 hdf_filename : PathLike 1944 The path to the HDF5 (.h5) or HDF4 (.hdf) file to write. 1945 data : np.ndarray 1946 The data array to write. 1947 *scales : Sequence[np.ndarray | None] 1948 The scales (coordinate arrays) for each dimension. 1949 dimensionality : int 1950 The expected number of dimensions of the data array. 1951 sync_dtype : bool 1952 If True, scale dtypes are cast to match the data array dtype. 1953 **kwargs 1954 Additional keyword arguments forwarded to :func:`write_hdf_data`. 1955 1956 Raises 1957 ------ 1958 ValueError 1959 If the data dimensionality does not match ``dimensionality``. 1960 1961 Examples 1962 -------- 1963 >>> import tempfile, numpy as np 1964 >>> from pathlib import Path 1965 >>> from psi_io.psi_io import _wrhdf_nd 1966 >>> from psi_io import read_hdf_data 1967 >>> f = np.ones((10, 20, 30), dtype=np.float32) 1968 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32) 1969 >>> t = np.linspace(0.0, 3.14, 20, dtype=np.float32) 1970 >>> p = np.linspace(0.0, 6.28, 30, dtype=np.float32) 1971 >>> with tempfile.TemporaryDirectory() as d: 1972 ... _wrhdf_nd(Path(d) / "out.h5", f, r, t, p, dimensionality=3) 1973 ... data, *_ = read_hdf_data(Path(d) / "out.h5") 1974 ... data.shape 1975 (10, 20, 30) 1976 """ 1977 if data.ndim != dimensionality: 1978 err = f'Expected {dimensionality}D data, got {data.ndim}D data instead.' 1979 raise ValueError(err) 1980 write_hdf_data(hdf_filename, data, *scales, sync_dtype=sync_dtype, **kwargs) 1981 1982 1983def _get_scales_nd_h5(ifile: Union[ Path, str], /, 1984 dimensionality: int, 1985 dataset_id: Optional[str] = None, 1986 ): 1987 """HDF5 (.h5) version of :func:`get_scales_1d` / :func:`get_scales_2d` / :func:`get_scales_3d`. 1988 1989 Parameters 1990 ---------- 1991 ifile : PathLike 1992 The path to the HDF5 (.h5) file. 1993 dimensionality : int 1994 The expected number of dimensions of the dataset. 1995 dataset_id : str, optional 1996 The identifier of the dataset whose scales are read. 1997 If ``None``, the default PSI dataset (``'Data'``) is used. 1998 1999 Returns 2000 ------- 2001 out : tuple[np.ndarray, ...] 2002 The coordinate scale arrays for each dimension. 2003 2004 Raises 2005 ------ 2006 ValueError 2007 If the dataset dimensionality does not match ``dimensionality``. 2008 ValueError 2009 If any dimension has no associated scale. 2010 2011 Examples 2012 -------- 2013 >>> from psi_io.psi_io import _get_scales_nd_h5 2014 >>> from psi_io.data import get_3d_data 2015 >>> r, t, p = _get_scales_nd_h5(get_3d_data(), dimensionality=3) 2016 >>> r.shape, t.shape, p.shape 2017 ((151,), (100,), (181,)) 2018 """ 2019 with h5.File(ifile, 'r') as hdf: 2020 data = hdf[dataset_id or PSI_DATA_ID['h5']] 2021 ndim = data.ndim 2022 if ndim != dimensionality: 2023 err = f'Expected {dimensionality}D data, got {ndim}D data instead.' 2024 raise ValueError(err) 2025 scales = [] 2026 for dim in data.dims: 2027 if dim: 2028 scales.append(dim[0][:]) 2029 else: 2030 raise ValueError(f'Dimension has no scale associated with it.') 2031 return tuple(scales) 2032 2033 2034def _get_scales_nd_h4(ifile: Union[ Path, str], /, 2035 dimensionality: int, 2036 dataset_id: Optional[str] = None, 2037 ): 2038 """HDF4 (.hdf) version of :func:`get_scales_1d` / :func:`get_scales_2d` / :func:`get_scales_3d`. 2039 2040 Parameters 2041 ---------- 2042 ifile : PathLike 2043 The path to the HDF4 (.hdf) file. 2044 dimensionality : int 2045 The expected number of dimensions of the dataset. 2046 dataset_id : str, optional 2047 The identifier of the dataset whose scales are read. 2048 If ``None``, the default PSI dataset (``'Data-Set-2'``) is used. 2049 2050 Returns 2051 ------- 2052 out : tuple[np.ndarray, ...] 2053 The coordinate scale arrays for each dimension. 2054 2055 Raises 2056 ------ 2057 ValueError 2058 If the dataset dimensionality does not match ``dimensionality``. 2059 ValueError 2060 If any dimension has no associated scale. 2061 2062 Examples 2063 -------- 2064 >>> from psi_io.psi_io import _get_scales_nd_h4 2065 >>> from psi_io.data import get_3d_data 2066 >>> r, t, p = _get_scales_nd_h4(get_3d_data(hdf=".hdf"), dimensionality=3) # doctest: +SKIP 2067 >>> r.shape # doctest: +SKIP 2068 (151,) 2069 """ 2070 hdf = h4.SD(str(ifile)) 2071 data = hdf.select(dataset_id or PSI_DATA_ID['h4']) 2072 ndim = data.info()[1] 2073 if ndim != dimensionality: 2074 err = f'Expected {dimensionality}D data, got {ndim}D data instead.' 2075 raise ValueError(err) 2076 scales = [] 2077 for k_, v_ in reversed(data.dimensions(full=1).items()): 2078 if v_[3]: 2079 scales.append(hdf.select(k_)[:]) 2080 else: 2081 raise ValueError('Dimension has no scale associated with it.') 2082 return tuple(scales) 2083 2084 2085def _read_h5_meta(ifile: PathLike, /, 2086 dataset_id: Optional[str] = None 2087 ): 2088 """HDF5 (.h5) version of :func:`read_hdf_meta`. 2089 2090 Examples 2091 -------- 2092 >>> from psi_io.psi_io import _read_h5_meta 2093 >>> from psi_io.data import get_3d_data 2094 >>> meta = _read_h5_meta(get_3d_data()) 2095 >>> meta[0].name 2096 'Data' 2097 """ 2098 with h5.File(ifile, 'r') as hdf: 2099 # Raises KeyError if ``dataset_id`` not found 2100 # If ``dataset_id`` is None, get all non-scale :class:`h5.Dataset`s 2101 if dataset_id: 2102 datasets = (dataset_id, hdf[dataset_id]), 2103 else: 2104 datasets = ((k, v) for k, v in hdf.items() if not v.is_scale) 2105 2106 # One should avoid multiple calls to ``dimproxy[0]`` – *e.g.* ``dimproxy[0].dtype`` and 2107 # ``dimproxy[0].shape`` – because the __getitem__ method creates and returns a new 2108 # :class:`~h5.DimensionProxy` object each time it is called. [Does this matter? Probably not.] 2109 return [HdfDataMeta(name=k, 2110 type=v.dtype, 2111 shape=v.shape, 2112 attr=dict(v.attrs), 2113 scales=[HdfScaleMeta(name=dimproxy.label, 2114 type=dim.dtype, 2115 shape=dim.shape, 2116 imin=dim[0], 2117 imax=dim[-1]) 2118 for dimproxy in v.dims if dimproxy and (dim := dimproxy[0])]) 2119 for k, v in datasets] 2120 2121 2122def _read_h4_meta(ifile: PathLike, /, 2123 dataset_id: Optional[str] = None 2124 ): 2125 """HDF4 (.hdf) version of :func:`read_hdf_meta`. 2126 2127 Examples 2128 -------- 2129 >>> from psi_io.psi_io import _read_h4_meta 2130 >>> from psi_io.data import get_3d_data 2131 >>> meta = _read_h4_meta(get_3d_data(hdf=".hdf")) # doctest: +SKIP 2132 >>> meta[0].name # doctest: +SKIP 2133 'Data-Set-2' 2134 """ 2135 hdf = h4.SD(str(ifile)) 2136 # Raises HDF4Error if ``dataset_id`` not found 2137 # If ``dataset_id`` is None, get all non-scale :class:`pyhdf.SD.SDS`s 2138 if dataset_id: 2139 datasets = (dataset_id, hdf.select(dataset_id)), 2140 else: 2141 datasets = ((k, hdf.select(k)) for k in hdf.datasets().keys() if not hdf.select(k).iscoordvar()) 2142 2143 # The inner list comprehension differs in approach from the HDF5 version because calling 2144 # ``dimensions(full=1)`` on an :class:`~pyhdf.SD.SDS` returns a dictionary of dimension 2145 # dataset identifiers (keys) and tuples containing dimension metadata (values). Even if no 2146 # coordinate-variable datasets are defined, this dictionary is still returned; the only 2147 # indication that the datasets returned do not exist is that the "type" field (within the 2148 # tuple of dimension metadata) is set to 0. 2149 2150 # Also, one cannot avoid multiple calls to ``hdf.select(k_)`` within the inner list comprehension 2151 # because :class:`~pyhdf.SD.SDS` objects do not define a ``__bool__`` method, and the fallback 2152 # behavior of Python is to assess if the __len__ method returns a non-zero value (which, in 2153 # this case, always returns 0). 2154 return [HdfDataMeta(name=k, 2155 type=SDC_TYPE_CONVERSIONS[v.info()[3]], 2156 shape=_cast_shape_tuple(v.info()[2]), 2157 attr=v.attributes(), 2158 scales=[HdfScaleMeta(name=k_, 2159 type=SDC_TYPE_CONVERSIONS[v_[3]], 2160 shape=_cast_shape_tuple(v_[0]), 2161 imin=hdf.select(k_)[0], 2162 imax=hdf.select(k_)[-1]) 2163 for k_, v_ in v.dimensions(full=1).items() if v_[3]]) 2164 for k, v in datasets] 2165 2166 2167def _read_h5_rtp(ifile: Union[ Path, str], /): 2168 """HDF5 (.h5) version of :func:`read_rtp_meta`. 2169 2170 Examples 2171 -------- 2172 >>> from psi_io.psi_io import _read_h5_rtp 2173 >>> from psi_io.data import get_3d_data 2174 >>> meta = _read_h5_rtp(get_3d_data()) 2175 >>> sorted(meta.keys()) 2176 ['p', 'r', 't'] 2177 """ 2178 with h5.File(ifile, 'r') as hdf: 2179 return {k: (hdf[v].size, hdf[v][0], hdf[v][-1]) 2180 for k, v in zip('rtp', PSI_SCALE_ID['h5'])} 2181 2182 2183def _read_h4_rtp(ifile: Union[ Path, str], /): 2184 """HDF4 (.hdf) version of :func:`read_rtp_meta`. 2185 2186 Examples 2187 -------- 2188 >>> from psi_io.psi_io import _read_h4_rtp 2189 >>> from psi_io.data import get_3d_data 2190 >>> meta = _read_h4_rtp(get_3d_data(hdf=".hdf")) # doctest: +SKIP 2191 >>> sorted(meta.keys()) # doctest: +SKIP 2192 ['p', 'r', 't'] 2193 """ 2194 hdf = h4.SD(str(ifile)) 2195 return {k: (hdf.select(v).info()[2], hdf.select(v)[0], hdf.select(v)[-1]) 2196 for k, v in zip('ptr', PSI_SCALE_ID['h4'])} 2197 2198 2199def _read_h5_data(ifile: PathLike, /, 2200 dataset_id: Optional[str] = None, 2201 return_scales: bool = True, 2202 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2203 """HDF5 (.h5) version of :func:`read_hdf_data`. 2204 2205 Examples 2206 -------- 2207 >>> from psi_io.psi_io import _read_h5_data 2208 >>> from psi_io.data import get_3d_data 2209 >>> data, r, t, p = _read_h5_data(get_3d_data()) 2210 >>> data.shape 2211 (181, 100, 151) 2212 """ 2213 with h5.File(ifile, 'r') as hdf: 2214 data = hdf[dataset_id or PSI_DATA_ID['h5']] 2215 dataset = data[:] 2216 if return_scales: 2217 scales = [dim[0][:] for dim in data.dims if dim] 2218 return dataset, *scales 2219 return dataset 2220 2221 2222def _read_h4_data(ifile: PathLike, /, 2223 dataset_id: Optional[str] = None, 2224 return_scales: bool = True, 2225 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2226 """HDF4 (.hdf) version of :func:`read_hdf_data`. 2227 2228 Examples 2229 -------- 2230 >>> from psi_io.psi_io import _read_h4_data 2231 >>> from psi_io.data import get_3d_data 2232 >>> data, *_ = _read_h4_data(get_3d_data(hdf=".hdf")) # doctest: +SKIP 2233 >>> data.shape # doctest: +SKIP 2234 (181, 100, 151) 2235 """ 2236 hdf = h4.SD(str(ifile)) 2237 data = hdf.select(dataset_id or PSI_DATA_ID['h4']) 2238 if return_scales: 2239 out = (data[:], 2240 *[hdf.select(k_)[:] for k_, v_ in reversed(data.dimensions(full=1).items()) if v_[3]]) 2241 else: 2242 out = data[:] 2243 return out 2244 2245 2246def _read_h5_by_index(ifile: PathLike, /, 2247 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None], 2248 dataset_id: Optional[str] = None, 2249 return_scales: bool = True, 2250 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2251 """HDF5 (.h5) version of :func:`read_hdf_by_index`. 2252 2253 Examples 2254 -------- 2255 >>> from psi_io.psi_io import _read_h5_by_index 2256 >>> from psi_io.data import get_3d_data 2257 >>> f, r, t, p = _read_h5_by_index(get_3d_data(), 0, None, None) 2258 >>> f.shape 2259 (181, 100, 1) 2260 """ 2261 with h5.File(ifile, 'r') as hdf: 2262 data = hdf[dataset_id or PSI_DATA_ID['h5']] 2263 if len(xi) != data.ndim: 2264 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}") 2265 slices = [_parse_index_inputs(slice_input) for slice_input in xi] 2266 dataset = data[tuple(reversed(slices))] 2267 if return_scales: 2268 scales = [dim[0][si] for si, dim in zip(slices, data.dims) if dim] 2269 return dataset, *scales 2270 return dataset 2271 2272def _read_h4_by_index(ifile: PathLike, /, 2273 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None], 2274 dataset_id: Optional[str] = None, 2275 return_scales: bool = True, 2276 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2277 """HDF4 (.hdf) version of :func:`read_hdf_by_index`. 2278 2279 Examples 2280 -------- 2281 >>> from psi_io.psi_io import _read_h4_by_index 2282 >>> from psi_io.data import get_3d_data 2283 >>> f, r, t, p = _read_h4_by_index(get_3d_data(hdf=".hdf"), 0, None, None) # doctest: +SKIP 2284 >>> f.shape # doctest: +SKIP 2285 (181, 100, 1) 2286 """ 2287 hdf = h4.SD(str(ifile)) 2288 data = hdf.select(dataset_id or PSI_DATA_ID['h4']) 2289 ndim = data.info()[1] 2290 if len(xi) != ndim: 2291 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}") 2292 slices = [_parse_index_inputs(slice_input) for slice_input in xi] 2293 dataset = data[tuple(reversed(slices))] 2294 if return_scales: 2295 scales = [hdf.select(k_)[si] for si, (k_, v_) in zip(slices, reversed(data.dimensions(full=1).items())) if v_[3]] 2296 return dataset, *scales 2297 return dataset 2298 2299 2300def _read_h5_by_value(ifile: PathLike, /, 2301 *xi: Union[float, Tuple[float, float], None], 2302 dataset_id: Optional[str] = None, 2303 return_scales: bool = True, 2304 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2305 """HDF5 (.h5) version of :func:`read_hdf_by_value`. 2306 2307 Examples 2308 -------- 2309 >>> from psi_io.psi_io import _read_h5_by_value 2310 >>> from psi_io.data import get_3d_data 2311 >>> f, r, t, p = _read_h5_by_value(get_3d_data(), 15, None, None) 2312 >>> f.shape 2313 (181, 100, 2) 2314 """ 2315 with h5.File(ifile, 'r') as hdf: 2316 data = hdf[dataset_id or PSI_DATA_ID['h5']] 2317 if len(xi) != data.ndim: 2318 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}") 2319 slices = [] 2320 for dimproxy, value in zip(data.dims, xi): 2321 if dimproxy: 2322 slices.append(_parse_value_inputs(dimproxy[0], value)) 2323 elif value is None: 2324 slices.append(slice(None)) 2325 else: 2326 raise ValueError("Cannot slice by value on dimension without scales") 2327 dataset = data[tuple(reversed(slices))] 2328 if return_scales: 2329 scales = [dim[0][si] for si, dim in zip(slices, data.dims) if dim] 2330 return dataset, *scales 2331 return dataset 2332 2333 2334def _read_h4_by_value(ifile: PathLike, /, 2335 *xi: Union[float, Tuple[float, float], None], 2336 dataset_id: Optional[str] = None, 2337 return_scales: bool = True, 2338 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2339 """HDF4 (.hdf) version of :func:`read_hdf_by_value`. 2340 2341 Examples 2342 -------- 2343 >>> from psi_io.psi_io import _read_h4_by_value 2344 >>> from psi_io.data import get_3d_data 2345 >>> f, r, t, p = _read_h4_by_value(get_3d_data(hdf=".hdf"), 15, None, None) # doctest: +SKIP 2346 >>> f.shape # doctest: +SKIP 2347 (181, 100, 2) 2348 """ 2349 hdf = h4.SD(str(ifile)) 2350 data = hdf.select(dataset_id or PSI_DATA_ID['h4']) 2351 ndim = data.info()[1] 2352 if len(xi) != ndim: 2353 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}") 2354 slices = [] 2355 for (k_, v_), value in zip(reversed(data.dimensions(full=1).items()), xi): 2356 if v_[3] != 0: 2357 slices.append(_parse_value_inputs(hdf.select(k_), value)) 2358 elif value is None: 2359 slices.append(slice(None)) 2360 else: 2361 raise ValueError("Cannot slice by value on dimension without scales") 2362 dataset = data[tuple(reversed(slices))] 2363 if return_scales: 2364 scales = [hdf.select(k_)[si] for si, (k_, v_) in zip(slices, reversed(data.dimensions(full=1).items())) if v_[3]] 2365 return dataset, *scales 2366 return dataset 2367 2368 2369def _read_h5_by_ivalue(ifile: PathLike, /, 2370 *xi: Union[float, Tuple[float, float], None], 2371 dataset_id: Optional[str] = None, 2372 return_scales: bool = True, 2373 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2374 """HDF5 (.h5) version of :func:`read_hdf_by_ivalue`. 2375 2376 Examples 2377 -------- 2378 >>> from psi_io.psi_io import _read_h5_by_ivalue 2379 >>> from psi_io.data import get_3d_data 2380 >>> f, r_idx, t_idx, p_idx = _read_h5_by_ivalue(get_3d_data(), 2.7, None, None) 2381 >>> r_idx.shape 2382 (2,) 2383 """ 2384 with h5.File(ifile, 'r') as hdf: 2385 data = hdf[dataset_id or PSI_DATA_ID['h5']] 2386 if len(xi) != data.ndim: 2387 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}") 2388 slices = [_parse_ivalue_inputs(*args) for args in zip(reversed(data.shape), xi)] 2389 dataset = data[tuple(reversed(slices))] 2390 if return_scales: 2391 scales = [np.arange(si.start or 0, si.stop or size) for si, size in zip(slices, reversed(data.shape))] 2392 return dataset, *scales 2393 return dataset 2394 2395 2396def _read_h4_by_ivalue(ifile: PathLike, /, 2397 *xi: Union[float, Tuple[float, float], None], 2398 dataset_id: Optional[str] = None, 2399 return_scales: bool = True, 2400 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2401 """HDF4 (.hdf) version of :func:`read_hdf_by_ivalue`. 2402 2403 Examples 2404 -------- 2405 >>> from psi_io.psi_io import _read_h4_by_ivalue 2406 >>> from psi_io.data import get_3d_data 2407 >>> f, r_idx, t_idx, p_idx = _read_h4_by_ivalue(get_3d_data(hdf=".hdf"), 2.7, None, None) # doctest: +SKIP 2408 >>> r_idx.shape # doctest: +SKIP 2409 (2,) 2410 """ 2411 hdf = h4.SD(str(ifile)) 2412 data = hdf.select(dataset_id or PSI_DATA_ID['h4']) 2413 ndim, shape = data.info()[1], _cast_shape_tuple(data.info()[2]) 2414 if len(xi) != ndim: 2415 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}") 2416 slices = [_parse_ivalue_inputs(*args) for args in zip(reversed(shape), xi)] 2417 dataset = data[tuple(reversed(slices))] 2418 if return_scales: 2419 scales = [np.arange(si.start or 0, si.stop or size) for si, size in zip(slices, reversed(shape))] 2420 return dataset, *scales 2421 return dataset 2422 2423 2424def _write_h4_data(ifile: PathLike, /, 2425 data: np.ndarray, 2426 *scales: Sequence[np.ndarray], 2427 dataset_id: Optional[str] = None, 2428 sync_dtype: bool = False, 2429 strict: bool = True, 2430 **kwargs) -> Path: 2431 """HDF4 (.hdf) version of :func:`write_hdf_data`. 2432 2433 Examples 2434 -------- 2435 >>> import tempfile, numpy as np 2436 >>> from pathlib import Path 2437 >>> from psi_io.psi_io import _write_h4_data, _read_h4_data 2438 >>> f = np.ones((10,), dtype=np.float32) 2439 >>> x = np.linspace(0.0, 1.0, 10, dtype=np.float32) 2440 >>> with tempfile.TemporaryDirectory() as d: # doctest: +SKIP 2441 ... _write_h4_data(Path(d) / "out.hdf", f, x) # doctest: +SKIP 2442 ... data, *_ = _read_h4_data(Path(d) / "out.hdf") # doctest: +SKIP 2443 ... data.shape # doctest: +SKIP 2444 (10,) 2445 """ 2446 dataid = dataset_id or PSI_DATA_ID['h4'] 2447 h4file = h4.SD(str(ifile), h4.SDC.WRITE | h4.SDC.CREATE | h4.SDC.TRUNC) 2448 sds_id = h4file.create(dataid, _dtype_to_sdc(data.dtype), data.shape) 2449 2450 if scales: 2451 for i, scale in enumerate(reversed(scales)): 2452 if scale is not None: 2453 if sync_dtype: 2454 scale = scale.astype(data.dtype) 2455 sds_id.dim(i).setscale(_dtype_to_sdc(scale.dtype), scale.tolist()) 2456 2457 if kwargs: 2458 for k, v in kwargs.items(): 2459 npv = np.asarray(v) 2460 attr_ = sds_id.attr(k) 2461 try: 2462 attr_.set(_dtype_to_sdc(npv.dtype), npv.tolist()) 2463 except KeyError as e: 2464 if strict: 2465 raise KeyError(f"Failed to set attribute '{k}' on dataset '{dataid}'") from e 2466 else: 2467 print(f"Warning: Failed to set attribute '{k}' on dataset '{dataid}'; skipping.") 2468 2469 sds_id.set(data) 2470 sds_id.endaccess() 2471 h4file.end() 2472 2473 return ifile 2474 2475 2476def _write_h5_data(ifile: PathLike, /, 2477 data: np.ndarray, 2478 *scales: Sequence[np.ndarray], 2479 dataset_id: Optional[str] = None, 2480 sync_dtype: bool = False, 2481 strict: bool = True, 2482 **kwargs) -> Path: 2483 """HDF5 (.h5) version of :func:`write_hdf_data`. 2484 2485 Examples 2486 -------- 2487 >>> import tempfile, numpy as np 2488 >>> from pathlib import Path 2489 >>> from psi_io.psi_io import _write_h5_data, _read_h5_data 2490 >>> f = np.ones((10,), dtype=np.float32) 2491 >>> x = np.linspace(0.0, 1.0, 10, dtype=np.float32) 2492 >>> with tempfile.TemporaryDirectory() as d: 2493 ... _write_h5_data(Path(d) / "out.h5", f, x) 2494 ... data, *_ = _read_h5_data(Path(d) / "out.h5") 2495 ... data.shape 2496 (10,) 2497 """ 2498 dataid = dataset_id or PSI_DATA_ID['h5'] 2499 with h5.File(ifile, "w") as h5file: 2500 dataset = h5file.create_dataset(dataid, data=data, dtype=data.dtype, shape=data.shape) 2501 2502 if scales: 2503 for i, scale in enumerate(scales): 2504 if scale is not None: 2505 if sync_dtype: 2506 scale = scale.astype(data.dtype) 2507 h5file.create_dataset(f"dim{i+1}", data=scale, dtype=scale.dtype, shape=scale.shape) 2508 h5file[dataid].dims[i].attach_scale(h5file[f"dim{i+1}"]) 2509 h5file[dataid].dims[i].label = f"dim{i+1}" 2510 2511 if kwargs: 2512 for key, value in kwargs.items(): 2513 try: 2514 dataset.attrs[key] = value 2515 except TypeError as e: 2516 if strict: 2517 raise TypeError(f"Failed to set attribute '{key}' on dataset '{dataid}'") from e 2518 else: 2519 print(f"Warning: Failed to set attribute '{key}' on dataset '{dataid}'; skipping.") 2520 2521 return ifile 2522 2523 2524def _np_linear_interpolation(xi: Sequence, scales: Sequence, values: np.ndarray): 2525 """ 2526 Perform linear interpolation over one dimension. 2527 2528 Parameters 2529 ---------- 2530 xi : list[float | None] 2531 Target coordinate values for each dimension; ``None`` marks a 2532 "free" (not interpolated) dimension. 2533 scales : list[np.ndarray] 2534 Coordinate scale arrays, one per dimension. 2535 values : np.ndarray 2536 The data array to interpolate. 2537 2538 Returns 2539 ------- 2540 out : np.ndarray 2541 The interpolated data array. 2542 2543 Examples 2544 -------- 2545 >>> import numpy as np 2546 >>> from psi_io.psi_io import _np_linear_interpolation 2547 >>> scales = [np.array([0.0, 1.0])] 2548 >>> values = np.array([0.0, 2.0]) 2549 >>> float(_np_linear_interpolation([0.5], scales, values)) 2550 1.0 2551 """ 2552 index0 = next((i for i, v in enumerate(xi) if v is not None), None) 2553 t = (xi[index0] - scales[index0][0])/(scales[index0][1] - scales[index0][0]) 2554 f0 = [slice(None, None)]*values.ndim 2555 f1 = [slice(None, None)]*values.ndim 2556 f0[index0] = 0 2557 f1[index0] = 1 2558 2559 return (1 - t)*values[tuple(f0)] + t*values[tuple(f1)] 2560 2561 2562def _np_bilinear_interpolation(xi, scales, values): 2563 """ 2564 Perform bilinear interpolation over two dimensions. 2565 2566 Parameters 2567 ---------- 2568 xi : Sequence[float | None] 2569 Target coordinate values for each dimension; ``None`` marks a 2570 "free" (not interpolated) dimension. 2571 scales : Sequence[np.ndarray] 2572 Coordinate scale arrays, one per dimension. 2573 values : np.ndarray 2574 The data array to interpolate. 2575 2576 Returns 2577 ------- 2578 out : np.ndarray 2579 The interpolated data array. 2580 2581 Examples 2582 -------- 2583 >>> import numpy as np 2584 >>> from psi_io.psi_io import _np_bilinear_interpolation 2585 >>> scales = [np.array([0.0, 1.0]), np.array([0.0, 1.0])] 2586 >>> values = np.array([[0.0, 0.0], [1.0, 2.0]]) 2587 >>> float(_np_bilinear_interpolation([0.5, 0.5], scales, values)) 2588 0.75 2589 """ 2590 index0, index1 = [i for i, v in enumerate(xi) if v is not None] 2591 t, u = [(xi[i] - scales[i][0])/(scales[i][1] - scales[i][0]) for i in (index0, index1)] 2592 2593 f00 = [slice(None, None)]*values.ndim 2594 f10 = [slice(None, None)]*values.ndim 2595 f01 = [slice(None, None)]*values.ndim 2596 f11 = [slice(None, None)]*values.ndim 2597 f00[index0], f00[index1] = 0, 0 2598 f10[index0], f10[index1] = 1, 0 2599 f01[index0], f01[index1] = 0, 1 2600 f11[index0], f11[index1] = 1, 1 2601 2602 return ( 2603 (1 - t)*(1 - u)*values[tuple(f00)] + 2604 t*(1 - u)*values[tuple(f10)] + 2605 (1 - t)*u*values[tuple(f01)] + 2606 t*u*values[tuple(f11)] 2607 ) 2608 2609 2610def _np_trilinear_interpolation(xi, scales, values): 2611 """ 2612 Perform trilinear interpolation over three dimensions. 2613 2614 Parameters 2615 ---------- 2616 xi : list[float | None] 2617 Target coordinate values for each dimension; ``None`` marks a 2618 "free" (not interpolated) dimension. 2619 scales : list[np.ndarray] 2620 Coordinate scale arrays, one per dimension. 2621 values : np.ndarray 2622 The data array to interpolate. 2623 2624 Returns 2625 ------- 2626 out : np.ndarray 2627 The interpolated data array. 2628 2629 Examples 2630 -------- 2631 >>> import numpy as np 2632 >>> from psi_io.psi_io import _np_trilinear_interpolation 2633 >>> scales = [np.array([0.0, 1.0]), np.array([0.0, 1.0]), np.array([0.0, 1.0])] 2634 >>> values = np.zeros((2, 2, 2)) 2635 >>> values[1, 1, 1] = 8.0 2636 >>> float(_np_trilinear_interpolation([0.5, 0.5, 0.5], scales, values)) 2637 1.0 2638 """ 2639 index0, index1, index2 = [i for i, v in enumerate(xi) if v is not None] 2640 t, u, v = [(xi[i] - scales[i][0])/(scales[i][1] - scales[i][0]) for i in (index0, index1, index2)] 2641 2642 f000 = [slice(None, None)]*values.ndim 2643 f100 = [slice(None, None)]*values.ndim 2644 f010 = [slice(None, None)]*values.ndim 2645 f110 = [slice(None, None)]*values.ndim 2646 f001 = [slice(None, None)]*values.ndim 2647 f101 = [slice(None, None)]*values.ndim 2648 f011 = [slice(None, None)]*values.ndim 2649 f111 = [slice(None, None)]*values.ndim 2650 2651 f000[index0], f000[index1], f000[index2] = 0, 0, 0 2652 f100[index0], f100[index1], f100[index2] = 1, 0, 0 2653 f010[index0], f010[index1], f010[index2] = 0, 1, 0 2654 f110[index0], f110[index1], f110[index2] = 1, 1, 0 2655 f001[index0], f001[index1], f001[index2] = 0, 0, 1 2656 f101[index0], f101[index1], f101[index2] = 1, 0, 1 2657 f011[index0], f011[index1], f011[index2] = 0, 1, 1 2658 f111[index0], f111[index1], f111[index2] = 1, 1, 1 2659 2660 c00 = values[tuple(f000)]*(1 - t) + values[tuple(f100)]*t 2661 c10 = values[tuple(f010)]*(1 - t) + values[tuple(f110)]*t 2662 c01 = values[tuple(f001)]*(1 - t) + values[tuple(f101)]*t 2663 c11 = values[tuple(f011)]*(1 - t) + values[tuple(f111)]*t 2664 2665 c0 = c00*(1 - u) + c10*u 2666 c1 = c01*(1 - u) + c11*u 2667 2668 return c0*(1 - v) + c1*v 2669 2670 2671def _check_index_ranges(arr_size: int, 2672 i0: Union[int, np.integer], 2673 i1: Union[int, np.integer] 2674 ) -> Tuple[int, int]: 2675 """ 2676 Adjust index ranges to ensure they cover at least two indices. 2677 2678 Parameters 2679 ---------- 2680 arr_size : int 2681 The size of the array along the dimension. 2682 i0 : int 2683 The starting index. 2684 i1 : int 2685 The ending index. 2686 2687 Returns 2688 ------- 2689 out : tuple[int, int] 2690 Adjusted starting and ending indices. 2691 2692 Notes 2693 ----- 2694 This function ensures that the range between `i0` and `i1` includes at least 2695 two indices for interpolation purposes. 2696 2697 Examples 2698 -------- 2699 >>> from psi_io.psi_io import _check_index_ranges 2700 >>> _check_index_ranges(10, 0, 0) 2701 (0, 2) 2702 >>> _check_index_ranges(10, 5, 5) 2703 (4, 6) 2704 >>> _check_index_ranges(10, 10, 10) 2705 (8, 10) 2706 """ 2707 i0, i1 = int(i0), int(i1) 2708 if i0 == 0: 2709 return (i0, i1 + 2) if i1 == 0 else (i0, i1 + 1) 2710 elif i0 == arr_size: 2711 return i0 - 2, i1 2712 else: 2713 return i0 - 1, i1 + 1 2714 2715 2716def _cast_shape_tuple(input: Union[int, Sequence[int]] 2717 ) -> tuple[int, ...]: 2718 """ 2719 Cast an input to a tuple of integers. 2720 2721 Parameters 2722 ---------- 2723 input : int | Sequence[int] 2724 The input to cast. 2725 2726 Returns 2727 ------- 2728 out : tuple[int, ...] 2729 The input cast as a tuple of integers. 2730 2731 Raises 2732 ------ 2733 TypeError 2734 If the input is neither an integer nor an iterable of integers. 2735 2736 Examples 2737 -------- 2738 >>> from psi_io.psi_io import _cast_shape_tuple 2739 >>> _cast_shape_tuple(5) 2740 (5,) 2741 >>> _cast_shape_tuple([3, 4, 5]) 2742 (3, 4, 5) 2743 """ 2744 if isinstance(input, int): 2745 return (input,) 2746 elif isinstance(input, Sequence): 2747 return tuple(int(i) for i in input) 2748 else: 2749 raise TypeError("Input must be an integer or an iterable of integers.") 2750 2751 2752def _parse_index_inputs(input: Union[int, slice, Sequence[Union[int, None]], None] 2753 ) -> slice: 2754 """ 2755 Parse various slice input formats into a standard slice object. 2756 2757 Parameters 2758 ---------- 2759 input : int | slice | Sequence[int | None] | None 2760 The index specification to parse: 2761 2762 - *int* — selects a single-element slice ``[n, n+1)``. 2763 - *Sequence[int | None]* — unpacked directly into :class:`slice`. 2764 - ``None`` — selects the entire dimension. 2765 2766 Returns 2767 ------- 2768 slice 2769 The parsed slice object. 2770 2771 Raises 2772 ------ 2773 TypeError 2774 If the input type is unsupported. 2775 2776 Examples 2777 -------- 2778 >>> from psi_io.psi_io import _parse_index_inputs 2779 >>> _parse_index_inputs(3) 2780 slice(3, 4, None) 2781 >>> _parse_index_inputs((2, 7)) 2782 slice(2, 7, None) 2783 >>> _parse_index_inputs(None) 2784 slice(None, None, None) 2785 """ 2786 if isinstance(input, int): 2787 return slice(input, input + 1) 2788 elif isinstance(input, Sequence): 2789 return slice(*input) 2790 elif input is None: 2791 return slice(None) 2792 else: 2793 raise TypeError("Unsupported input type for slicing.") 2794 2795 2796def _parse_value_inputs(dimproxy, 2797 value, 2798 scale_exists: bool = True 2799 ) -> slice: 2800 """Parse a scale value or value range into a slice over a dimension's coordinate array. 2801 2802 Parameters 2803 ---------- 2804 dimproxy : array-like 2805 The coordinate array (scale) for the dimension, supporting ``[:]`` indexing. 2806 value : float | tuple[float | None, float | None] | None 2807 The target value or range: 2808 2809 - ``None`` — select the entire dimension. 2810 - *float* — find the 2-element bracket ``[a, b]`` such that ``a <= value < b``. 2811 - *(float, float)* — find the bracket spanning the given range. 2812 scale_exists : bool 2813 Guard flag; raises :exc:`ValueError` if ``False`` and ``value`` is not ``None``. 2814 2815 Returns 2816 ------- 2817 slice 2818 A slice object that selects the appropriate indices from the coordinate array. 2819 2820 Raises 2821 ------ 2822 ValueError 2823 If ``value`` is not ``None`` and ``scale_exists`` is ``False``. 2824 2825 Examples 2826 -------- 2827 >>> import numpy as np 2828 >>> from psi_io.psi_io import _parse_value_inputs 2829 >>> scale = np.linspace(0.0, 10.0, 11) 2830 >>> _parse_value_inputs(scale, None) 2831 slice(None, None, None) 2832 >>> _parse_value_inputs(scale, 4.5) 2833 slice(4, 6, None) 2834 >>> _parse_value_inputs(scale, (2.0, 7.0)) 2835 slice(1, 8, None) 2836 """ 2837 if value is None: 2838 return slice(None) 2839 if not scale_exists: 2840 raise ValueError("Cannot parse value inputs when scale does not exist.") 2841 dim = dimproxy[:] 2842 if not isinstance(value, Sequence): 2843 insert_index = np.searchsorted(dim, value) 2844 return slice(*_check_index_ranges(dim.size, insert_index, insert_index)) 2845 else: 2846 temp_range = list(value) 2847 if temp_range[0] is None: 2848 temp_range[0] = -np.inf 2849 if temp_range[-1] is None: 2850 temp_range[-1] = np.inf 2851 insert_indices = np.searchsorted(dim, temp_range) 2852 return slice(*_check_index_ranges(dim.size, *insert_indices)) 2853 2854 2855def _parse_ivalue_inputs(dimsize, 2856 input: Union[Union[int, float], slice, Sequence[Union[Union[int, float], None]], None] 2857 ) -> slice: 2858 """ 2859 Parse a sub-index value or range into a slice over a dimension of a given size. 2860 2861 Parameters 2862 ---------- 2863 dimsize : int 2864 The size of the array along the dimension; used to clamp the resulting slice. 2865 input : int | float | Sequence[int | float | None] | None 2866 The sub-index specification to parse: 2867 2868 - ``None`` — select the entire dimension. 2869 - *int* or *float* (:math:`a`) — returns ``slice(floor(a), ceil(a))`` 2870 (clamped to ``[0, dimsize - 1]``). 2871 - *Sequence* ``(a, b)`` — returns ``slice(floor(a), ceil(b))`` 2872 (clamped to ``[0, dimsize - 1]``). 2873 2874 Returns 2875 ------- 2876 slice 2877 The parsed slice object, guaranteed to span at least 2 elements. 2878 2879 Raises 2880 ------ 2881 TypeError 2882 If the input type is unsupported. 2883 2884 Examples 2885 -------- 2886 >>> from psi_io.psi_io import _parse_ivalue_inputs 2887 >>> _parse_ivalue_inputs(10, None) 2888 slice(None, None, None) 2889 >>> _parse_ivalue_inputs(10, 2.7) 2890 slice(2, 4, None) 2891 >>> _parse_ivalue_inputs(10, (1.3, 4.8)) 2892 slice(1, 5, None) 2893 """ 2894 if input is None: 2895 return slice(None) 2896 elif isinstance(input, (int, float)): 2897 i0, i1 = math.floor(input), math.ceil(input) 2898 elif isinstance(input, Sequence): 2899 i0, i1 = math.floor(input[0]), math.ceil(input[1]) 2900 else: 2901 raise TypeError("Unsupported input type for slicing.") 2902 2903 if i0 > i1: 2904 i0, i1 = i1, i0 2905 i0, i1 = max(0, i0), min(dimsize - 1, i1) 2906 if i0 > i1: 2907 i0, i1 = i1, i0 2908 i0, i1 = max(0, i0), min(dimsize - 1, i1) 2909 if (i1 - i0) < 2: 2910 return slice(i0, i1 + 2 - (i1-i0)) 2911 else: 2912 return slice(i0, i1)