Source code for clapper.config

# SPDX-FileCopyrightText: Copyright © 2022 Idiap Research Institute <contact@idiap.ch>
# SPDX-FileContributor: Amir Mohammadi  <amir.mohammadi@idiap.ch>
#
# SPDX-License-Identifier: BSD-3-Clause

"""Functionality to implement python-based config file parsing and loading."""

import importlib.util
import logging
import pathlib
import types
import typing

from importlib.metadata import EntryPoint, entry_points

logger = logging.getLogger(__name__)

_LOADED_CONFIGS = []
"""Small gambiarra (https://www.urbandictionary.com/define.php?term=Gambiarra)
to avoid the garbage collector to collect some already imported modules."""


def _load_context(path: str, mod: types.ModuleType) -> types.ModuleType:
    """Load the Python file as module, returns a resolved context.

    This function is implemented in a way that is both Python 2 and Python 3
    compatible. It does not directly load the python file, but reads its
    contents in memory before Python-compiling it. It leaves no traces on the
    file system.

    Arguments:

        path: The full path of the Python file to load the module contents from

        mod: A preloaded module to use as a default context for the next module
            loading. You can create a new module using :py:mod:`types` as in:

            .. code-block:: python

               ctxt: dict[str, typing.Any] = {}
               m = types.ModuleType("name")
               m.__dict__.update(ctxt)

            ``ctxt`` is a python dictionary mapping strings to object values
            representing the contents of the module to be created.


    Returns
    -------
        A python module with the fully resolved context
    """
    # executes the module code on the context of previously imported modules
    with pathlib.Path(path).open("rb") as f:
        exec(compile(f.read(), path, "exec"), mod.__dict__)

    return mod


def _get_module_filename(module_name: str) -> str | None:
    """Resolve a module name to an actual Python file.

    This function will return the path to the file containing the module named
    at ``module_name``.  Values for this parameter are dot-separated module
    names such as ``expose.config``.


    Arguments:

        module_name: The name of the module to search


    Returns
    -------
        The path that corresponds to file implementing the provided module name
    """
    try:
        module_spec = importlib.util.find_spec(module_name)
        if module_spec is None:
            return None
        return module_spec.origin
    except (ModuleNotFoundError,):
        return None


def _object_name(
    path: str | pathlib.Path, common_name: str | None
) -> tuple[str, str | None]:
    if isinstance(path, pathlib.Path):
        path = str(path)

    r = path.rsplit(":", 1)
    return r[0], (common_name if len(r) < 2 else r[1])


def _resolve_entry_point_or_modules(
    paths: list[str | pathlib.Path],
    entry_point_group: str | None = None,
    common_name: str | None = None,
) -> tuple[list[str], list[str], list[str]]:
    """Resolve a mixture of paths, entry point names, and module names to
    path.

    This function can resolve actual file system paths, ``setup.py``
    entry-point names and module names to a set of file system paths.

    Examples of things that can be resolved by this function are:
    ``["/tmp/config.py", "my-config", "expose.config"]`` (an actualy filesystem
    path, an entry-point described in a ``setup.py`` file, or the name of a
    python module.

    Parameters
    ----------
    paths
        An iterable strings that either point to actual files, are entry point
        names, or are module names.
    entry_point_group
        The entry point group name to search in entry points.
    common_name
        It will be used as a default name for object names. See the
        ``attribute_name`` parameter from :py:func:`load`.


    Returns
    -------
        A tuple containing three lists of strings with:

        * The resolved paths pointing to existing files
        * The valid python module names to bind each of the files to, and
          finally,
        * The name of objects that are supposed to be picked from paths


    Raises
    ------
    ValueError
        If one of the paths cannot be resolved to an actual path to a file.
    """

    if entry_point_group is not None:
        entry_point_dict: dict[str, EntryPoint] = {
            e.name: e for e in entry_points(group=entry_point_group)
        }
    else:
        entry_point_dict = {}

    files = []
    module_names = []
    object_names = []

    for path in paths:
        module_name = "user_config"  # fixed module name for files with full paths
        resolved_path, object_name = _object_name(path, common_name)

        # if it already points to a file, then do nothing
        if pathlib.Path(resolved_path).is_file():
            pass

        # If it is an entry point name, collect path and module name
        elif resolved_path in entry_point_dict:
            entry = entry_point_dict[resolved_path]
            module_name = entry.module
            object_name = entry.attr if entry.attr else common_name

            resolved_path = _get_module_filename(module_name)
            if resolved_path is None or not pathlib.Path(resolved_path).is_file():
                raise ValueError(
                    f"The specified entry point `{path}' pointing to module "
                    f"`{module_name}' and resolved to `{resolved_path}' does "
                    f"not point to an existing file."
                )

        # If it is not a path nor an entry point name, it is a module name then?
        else:
            # if we have gotten here so far then path must resolve as a module
            resolved_path = _get_module_filename(resolved_path)
            if resolved_path is None or not pathlib.Path(resolved_path).is_file():
                raise ValueError(
                    f"The specified path `{path}' is not a file, a entry "
                    f"point name, or a known-module name"
                )

        files.append(resolved_path)
        module_names.append(module_name)
        object_names.append(object_name)

    return files, module_names, object_names



[docs]
def load(
    paths: list[str | pathlib.Path],
    context: dict[str, typing.Any] | None = None,
    entry_point_group: str | None = None,
    attribute_name: str | None = None,
) -> types.ModuleType | typing.Any:
    """Load a set of configuration files, in sequence.

    This method will load one or more configuration files. Every time a
    configuration file is loaded, the context (variables) loaded from the
    previous file is made available, so the new configuration file can override
    or modify this context.

    Parameters
    ----------
    paths
        A list or iterable containing paths (relative or absolute) of
        configuration files that need to be loaded in sequence. Each
        configuration file is loaded by creating/modifying the context
        generated after each file readout.
    context
        If provided, start the readout of the first configuration file with the
        given context. Otherwise, create a new internal context.
    entry_point_group
        If provided, it will treat non-existing file paths as entry point names
        under the ``entry_point_group`` name.
    attribute_name
        If provided, will look for the ``attribute_name`` variable inside the
        loaded files. Paths ending with ``some_path:variable_name`` can
        override the ``attribute_name``. The ``entry_point_group`` must
        provided as well ``attribute_name`` is not ``None``.


    Returns
    -------
        A module representing the resolved context, after loading the provided
        modules and resolving all variables. If ``attribute_name`` is given,
        the object with the given ``attribute_name`` name (or the name provided
        by user) is returned instead of the module.


    Raises
    ------
    ImportError
        If attribute_name is given but the object does not exist in the paths.
    ValueError
        If attribute_name is given but entry_point_group is not given.
    """

    # resolve entry points to paths
    resolved_paths, names, object_names = _resolve_entry_point_or_modules(
        paths, entry_point_group, attribute_name
    )

    ctxt = types.ModuleType("initial_context")
    if context is not None:
        ctxt.__dict__.update(context)

    # Small gambiarra (https://www.urbandictionary.com/define.php?term=Gambiarra)
    # to avoid the garbage collector to collect some already imported modules.
    _LOADED_CONFIGS.append(ctxt)

    # if no paths are provided, return context
    if not resolved_paths:
        return ctxt

    mod = None
    for k, n in zip(resolved_paths, names):
        logger.debug("Loading configuration file `%s'...", k)
        mod = types.ModuleType(n)
        # remove the keys that might break the loading of the next config file.
        ctxt.__dict__.pop("__name__", None)
        ctxt.__dict__.pop("__package__", None)
        # do not propogate __ variables
        context = {k: v for k, v in ctxt.__dict__.items() if not k.startswith("__")}
        mod.__dict__.update(context)
        _LOADED_CONFIGS.append(mod)
        ctxt = _load_context(k, mod)

    if not attribute_name:
        return mod

    # We pick the last object_name here. Normally users should provide just one
    # path when enabling the attribute_name parameter.
    attribute_name = object_names[-1]
    if attribute_name is not None and not hasattr(mod, attribute_name):
        raise ImportError(
            f"The desired variable `{attribute_name}' does not exist in any of "
            f"your configuration files: {', '.join(resolved_paths)}"
        )

    return getattr(mod, attribute_name)




[docs]
def mod_to_context(mod: types.ModuleType) -> dict[str, typing.Any]:
    """Convert the loaded module of :py:func:`load` to a dictionary context.

    This function removes all the variables that start and end with ``__``.

    Parameters
    ----------
    mod
        a Python module, e.g., as returned by :py:func:`load`.

    Returns
    -------
        The context that was in ``mod``, as a dictionary mapping strings to
        objects.
    """
    return {
        k: v
        for k, v in mod.__dict__.items()
        if not (k.startswith("__") and k.endswith("__"))
    }




[docs]
def resource_keys(
    entry_point_group: str,
    exclude_packages: tuple[str, ...] = tuple(),
    strip: tuple[str, ...] = ("dummy",),
) -> list[str]:
    """Read and returns all resources that are registered on a entry-point
    group.

    Entry points from the given ``exclude_packages`` list are ignored.  Notice
    we are using :py:mod:`importlib.metadata` to load entry-points, and that
    that entry point distribution (``.dist`` attribute) was only added to
    Python in version 3.10.  We therefore currently only verify if the named
    resource does not start with any of the strings provided in
    `exclude_package``.

    Parameters
    ----------
    entry_point_group
        The entry point group name.
    exclude_packages
        List of packages to exclude when finding resources.
    strip
        Entrypoint names that start with any value in ``strip`` will be
        ignored.


    Returns
    -------
        Alphabetically sorted list of resources matching your query
    """

    ret_list = [
        k.name
        for k in entry_points(group=entry_point_group)
        if (
            (not k.name.strip().startswith(exclude_packages))
            and (not k.name.startswith(strip))
        )
    ]
    ret_list = list(dict.fromkeys(ret_list))  # order-preserving uniq
    return sorted(ret_list)