Source code for medigan.contribute_model.model_contributor

# -*- coding: utf-8 -*-
# ! /usr/bin/env python
"""Model contributor class that tests models, creates metadata entries, uploads and contributes them to medigan. """

from __future__ import absolute_import

import importlib
import logging
import sys
from pathlib import Path

from ..constants import (
    CONFIG_FILE_KEY_DEPENDENCIES,
    CONFIG_FILE_KEY_EXECUTION,
    CONFIG_FILE_KEY_GENERATE,
    CONFIG_FILE_KEY_GENERATE_NAME,
    CONFIG_FILE_KEY_MODEL_EXTENSION,
    CONFIG_FILE_KEY_MODEL_NAME,
    CONFIG_FILE_KEY_PACKAGE_LINK,
    CONFIG_FILE_KEY_PACKAGE_NAME,
    CONFIG_TEMPLATE_FILE_NAME_AND_EXTENSION,
    CONFIG_TEMPLATE_FILE_URL,
    INIT_PY_FILE,
    TEMPLATE_FOLDER,
)
from ..utils import Utils
from .github_model_uploader import GithubModelUploader
from .zenodo_model_uploader import ZenodoModelUploader


[docs]class ModelContributor:
    """`ModelContributor` class: Contributes a user's local model to the public medigan library

    Parameters
    ----------
    model_id: str
        The generative model's unique id
    init_py_path: str
        The path to the local model's `__init__.py` file needed for importing and running this model.

    Attributes
    ----------
    model_id: str
        The generative model's unique id
    init_py_path: str
        The path to the local model's __init__.py file needed for importing and running this model.
    package_path: str
        Path as string to the generative model's python package
    package_name: str
        Name of the model's python package i.e. the name of the model's zip file and unzipped package folder
    metadata_file_path: str
        Path as string to the generative model's metadata file e.g. default is relative path to package root.
    zenodo_model_uploader: str
        An instance of the `ZenodoModelUploader` class
    github_model_uploader: str
        An instance of the `GithubModelUploader` class.
    """

    def __init__(
        self,
        model_id: str,
        init_py_path: str,
    ):
        self.validate_model_id(model_id)
        self.model_id = model_id
        self.init_py_path = init_py_path
        self.validate_init_py_path(init_py_path)
        self.package_path = self.init_py_path.replace(INIT_PY_FILE, "")
        self.package_name = Path(self.package_path).name
        self.metadata_file_path = ""  # Default is relative path to package root.
        self.validate_local_model_import()
        self.zenodo_model_uploader = None
        self.github_model_uploader = None

    ############################ VALIDATION ############################

[docs]    def validate_model_id(
        self, model_id: str, max_chars: int = 30, min_chars: int = 13
    ) -> bool:
        """Asserts if the `model_id` is in the correct format and has a valid length

        Parameters
        ----------
        model_id: str
            The generative model's unique id
        max_chars: int
            the maximum of chars allowed in the model_id
        min_chars: int
            the minimum of chars allowed in the model_id

        Returns
        -------
        bool
            Returns flag indicating whether the `model_id` is correctly formatted.
        """

        num_chars = len(model_id)
        assert (
            num_chars <= max_chars
        ), f"The model_id {model_id} is too large ({num_chars}). Please reduce to a maximum of {max_chars} characters. Format Convention: '00001_GANTYPE_MODALITY'"
        assert (
            num_chars >= min_chars
        ), f"The model_id {model_id} is too small ({num_chars}). Please reduce to a minimum of {min_chars} characters. Format Convention: '00001_GANTYPE_MODALITY'"
        for i in range(5):
            assert model_id[
                i
            ].isdigit(), f"Your model_id's ({model_id}) character '{model_id[i]}' at position {i} is not a digit. The first 5 characters should be digits as in '00001_GANTYPE_MODALITY'. Please adjust."
        logging.info(
            f"The provided model_id is valid and will now be used to refer to the contributed model in medigan: {model_id}"
        )
        return True

[docs]    def validate_init_py_path(self, init_py_path) -> bool:
        """Asserts whether the `init_py_path` exists and points to a valid `__init__.py` correct file.

        Parameters
        ----------
        init_py_path: str
            The path to the local model's __init__.py file needed for importing and running this model.
        """

        assert (
            Path(init_py_path).exists() and Path(init_py_path).is_file()
        ), f"{self.model_id}: The path to your model's __init__.py function does not exist or does not point to a file. Please revise path {init_py_path}. Note: You can find an __init__.py example in https://github.com/RichardObi/medigan/tree/main/templates"
        assert Utils.is_file_in(
            folder_path=self.init_py_path.replace(f"/{INIT_PY_FILE}", ""),
            filename=INIT_PY_FILE,
        ), f"{self.model_id}: No __init__.py was found in your path {init_py_path}. Please revise. Note: You can find an __init__.py example in /templates in https://github.com/RichardObi/medigan"
        logging.info(
            f"The provided path to your model's __init__.py function was valid and points to a __init__.py file: {init_py_path}"
        )
        return True

[docs]    def validate_and_update_model_weights_path(self) -> dict:
        """Check if the model files can be found in the `package_path` or based on the `path_to_metadata`.

        Ideally, the user provided `package_path` and the `path_to_metadata` should both point to the same model package
        containing weights, config, license, etc. Here we check both of these paths to find the model weights.

        Returns
        -------
        dict
            Returns the metadata after updating the path to the model's checkpoint's weights
        """

        metadata_dir_path = Path(self.metadata_file_path).parent

        potential_weight_paths: list = []

        execution_metadata = self.metadata[self.model_id][CONFIG_FILE_KEY_EXECUTION]

        # package_path + package_path + file + extension
        try:
            potential_weight_paths.append(
                Path(
                    self.package_path
                    + f"/{execution_metadata[CONFIG_FILE_KEY_MODEL_NAME]}{execution_metadata[CONFIG_FILE_KEY_MODEL_EXTENSION]}"
                )
            )
        except KeyError as e:
            raise e

        # metadata_dir + package_path + file + extension
        try:
            potential_weight_paths.append(
                Path(
                    str(metadata_dir_path)
                    + f"/{execution_metadata[CONFIG_FILE_KEY_MODEL_NAME]}{execution_metadata[CONFIG_FILE_KEY_MODEL_EXTENSION]}"
                )
            )
        except KeyError as e:
            raise e

        # metadata_dir + package_path + file + extension
        try:
            potential_weight_paths.append(
                Path(
                    str(metadata_dir_path)
                    + "/"
                    + self.package_path
                    + f"/{execution_metadata[CONFIG_FILE_KEY_MODEL_NAME]}{execution_metadata[CONFIG_FILE_KEY_MODEL_EXTENSION]}"
                )
            )
        except KeyError as e:
            raise e

        for potential_weight_path in potential_weight_paths:
            if potential_weight_path.is_file():
                # Checking if there is a weights/checkpoint (model name + extension) file in the package /metadata path
                self.package_path = str(
                    Path(potential_weight_path).parent.resolve(strict=False)
                )  # strict=False, as models might be not on user's disc.
                self.metadata[self.model_id][CONFIG_FILE_KEY_EXECUTION][
                    CONFIG_FILE_KEY_PACKAGE_LINK
                ] = self.package_path
                logging.info(
                    f"The model weights path is valid and was added to the metadata of your model: {self.package_path}"
                )
                return self.metadata
        raise FileNotFoundError(
            f"{self.model_id}: Error validating metadata. There was no valid model weights file found. Please revise. Tested paths: '{potential_weight_paths}'"
        )

[docs]    def validate_local_model_import(self):
        """Check if the model package in the `package_path` can be imported as python library using importlib."""

        # Validation: Import module as python library to check if generate function is inside the
        # path_to_script_w_generate_function python file and no errors occur.
        try:
            sys.path.insert(1, str(self.package_path).replace(self.package_name, ""))
            importlib.import_module(name=self.package_name)
            logging.info(
                f"Model import test successful: The model was successfully imported using importlib: {self.package_name}"
            )
        except Exception as e:
            raise Exception(
                f"{self.model_id}: Error while testing importlib model import. Is your {INIT_PY_FILE} erroneous? "
                f"Please revise if the provided path ({self.init_py_path}) is valid and accessible and try again. "
                f"Exception: {e}"
            ) from e

    ############################ UPLOAD ############################

[docs]    def push_to_zenodo(
        self,
        access_token: str,
        creator_name: str,
        creator_affiliation: str,
        model_description: str = "",
    ):
        """Upload the model files as zip archive to a public Zenodo repository where the model will be persistently stored.

        Get your Zenodo access token here: https://zenodo.org/account/settings/applications/tokens/new/ (Enable scopes `deposit:actions` and `deposit:write`)

        Parameters
        ----------
        access_token: str
            a personal access token in Zenodo linked to a user account for authentication
        creator_name: str
            the creator name that will appear on the corresponding Zenodo model upload homepage
        creator_affiliation: str
            the creator affiliation that will appear on the corresponding Zenodo model upload homepage
        model_description: list
            the model_description that will appear on the corresponding Zenodo model upload homepage

        Returns
        -------
        str
            Returns the url pointing to the corresponding Zenodo model upload homepage
        """

        if self.zenodo_model_uploader is None:
            self.zenodo_model_uploader = ZenodoModelUploader(
                model_id=self.model_id, access_token=access_token
            )
        return self.zenodo_model_uploader.push(
            metadata=self.metadata,
            package_path=self.package_path,
            package_name=self.package_name,
            creator_name=creator_name,
            creator_affiliation=creator_affiliation,
            model_description=model_description,
        )

[docs]    def push_to_github(
        self,
        access_token: str,
        package_link: str = None,
        creator_name: str = "",
        creator_affiliation: str = "",
        model_description: str = "",
    ):
        """Upload the model's metadata inside a github issue to the medigan github repository.

        To add your model to medigan, your metadata will be reviewed on Github and added to medigan's official model metadata

        The medigan repository issues page: https://github.com/RichardObi/medigan/issues

        Get your Github access token here: https://github.com/settings/tokens

        Parameters
        ----------
        access_token: str
            a personal access token linked to your github user account, used as means of authentication
        package_link:
            a package link
        creator_name: str
            the creator name that will appear on the corresponding github issue
        creator_affiliation: str
            the creator affiliation that will appear on the corresponding github issue
        model_description: list
            the model_description that will appear on the corresponding github issue

        Returns
        -------
        str
            Returns the url pointing to the corresponding issue on github
        """

        if self.github_model_uploader is None:
            self.github_model_uploader = GithubModelUploader(
                model_id=self.model_id, access_token=access_token
            )

        return self.github_model_uploader.push(
            metadata=self.metadata,
            package_link=package_link,
            creator_name=creator_name,
            creator_affiliation=creator_affiliation,
            model_description=model_description,
        )

    ############################ METADATA ############################

[docs]    def load_metadata_template(self) -> dict:
        """Loads and parses (json to dict) a default medigan metadata template.

        Returns
        -------
        dict
            Returns the metadata template as dict
        """

        path_to_metadata_template = Path(
            f"{TEMPLATE_FOLDER}/{CONFIG_TEMPLATE_FILE_NAME_AND_EXTENSION}"
        )
        Utils.mkdirs(TEMPLATE_FOLDER)
        Utils.is_file_located_or_downloaded(
            download_link=CONFIG_TEMPLATE_FILE_URL,
            path_as_string=path_to_metadata_template,
        )
        metadata_template = Utils.read_in_json(path_as_string=path_to_metadata_template)
        if self.model_id is not None:
            # Replacing the placeholder id of template with model_id
            metadata_template[self.model_id] = metadata_template[
                list(metadata_template)[0]
            ]
            del metadata_template[list(metadata_template)[0]]
        return metadata_template

[docs]    def add_metadata_from_file(self, metadata_file_path) -> dict:
        """Read and parse the metadata of a local model, identified by `model_id`, from a metadata file in json format.

        Parameters
        ----------
        model_id: str
            The generative model's unique id
        metadata_file_path: str
            the path pointing to the metadata file

        Returns
        -------
        dict
            Returns a dict containing the contents of parsed metadata json file.
        """

        if Path(metadata_file_path).is_file():
            self.metadata = Utils.read_in_json(path_as_string=metadata_file_path)
            self.metadata_file_path = metadata_file_path
        elif Path(metadata_file_path + "/metadata.json").is_file():
            self.metadata = Utils.read_in_json(
                path_as_string=metadata_file_path + "/metadata.json"
            )
            self.metadata_file_path = metadata_file_path + "/metadata.json"
        else:
            raise FileNotFoundError(
                f"{self.model_id}: No metadata json file was found in the path you provided ({metadata_file_path}). "
                f"If you do not have a metadata file, create one using the add_metadata_from_input() function."
            )
        self.validate_and_update_model_weights_path()
        return self.metadata

[docs]    def add_metadata_from_input(
        self,
        model_weights_name: str = None,
        model_weights_extension: str = None,
        generate_method_name: str = None,
        dependencies: list = [],
        fill_more_fields_interactively: bool = True,
        output_path: str = "config",
    ):
        """Create a metadata dict for a local model, identified by `model_id`, given the necessary minimum metadata contents.

        Parameters
        ----------
        model_id: str
            The generative model's unique id
        model_weights_name: str
            the name of the checkpoint file containing the model's weights
        model_weights_extension: str
            the extension (e.g. .pt) of the checkpoint file containing the model's weights
        generate_method_name: str
            the name of the sample generation method inside the models __init__.py file
        dependencies: list
            the list of dependencies that need to be installed via pip to run the model.
        fill_more_fields_interactively: bool
            flag indicating whether a user will be interactively asked via command line for further input to fill out missing metadata content
        output_path: str
            the path where the created metadata json file will be stored.

        Returns
        -------
        dict
            Returns a dict containing the contents of the metadata json file.
        """

        # Get the metadata template to guide data structure and formatting of metadata.
        self.metadata_template = self.load_metadata_template()

        # Generate metadata with variables provided as parameters
        metadata = self.metadata_template[self.model_id][CONFIG_FILE_KEY_EXECUTION]
        metadata.update({CONFIG_FILE_KEY_PACKAGE_LINK: self.package_path})
        metadata.update({CONFIG_FILE_KEY_PACKAGE_NAME: self.package_name})
        metadata.update({CONFIG_FILE_KEY_MODEL_NAME: model_weights_name})
        metadata.update({CONFIG_FILE_KEY_MODEL_EXTENSION: model_weights_extension})
        metadata.update({CONFIG_FILE_KEY_DEPENDENCIES: dependencies})
        metadata[CONFIG_FILE_KEY_GENERATE][
            CONFIG_FILE_KEY_GENERATE_NAME
        ] = generate_method_name
        metadata_final = self.metadata_template
        metadata_final[self.model_id].update({CONFIG_FILE_KEY_EXECUTION: metadata})

        Utils.store_dict_as(
            dictionary=metadata_final,
            extension=".json",
            output_path=output_path,
            filename=self.model_id,
        )
        logging.info(
            f"{self.model_id}: Your model's metadata was stored in {output_path}."
        )

        if fill_more_fields_interactively:
            # Add more information to the metadata dict via user prompts
            metadata_final = self._recursively_fill_metadata(metadata=metadata_final)
            # Store again as additional fields should have now been filled
            Utils.store_dict_as(
                dictionary=metadata_final,
                extension=".json",
                output_path=output_path,
                filename=self.model_id,
            )
            logging.info(
                f"{self.model_id}: Your model's metadata was updated. Find it in {output_path}/{self.model_id}.json"
            )

        self.metadata = metadata_final
        self.validate_and_update_model_weights_path()

        return self.metadata

[docs]    def is_value_for_key_already_set(
        self, key: str, metadata: dict, nested_key
    ) -> bool:
        """Check if the value of a `key` in a `metadata` dictionary is already set and e.g. not an empty string, dict or list.

        Parameters
        ----------
        key: str
            The key in the currently traversed part of the model's metadata dictionary
        metadata: dict
            The currently traversed part of the model's metadata dictionary
        nested_key: str
            the `nested_key` indicates which subpart of the model's metadata we are currently traversing

        Returns
        -------
        bool
            Flag indicating whether a value exists for the `key` in the dict
        """

        if (
            metadata.get(key) is None
            or metadata.get(key) == ""
            or (isinstance(metadata.get(key), list) and not metadata.get(key))
            or isinstance(metadata.get(key), dict)
        ):
            # Note: If metadata.get(key) is referencing a dict, we always want to go inside the dict and add values.
            return False
        else:
            logging.debug(
                f"{self.model_id}: Key value pair ({key}:{metadata.get(key)}) already exists in metadata for key "
                f"'{nested_key}'. Not prompting user to insert value for this key."
            )
            return True

    def _recursively_fill_metadata(
        self, metadata_template: dict = None, metadata: dict = {}, nested_key: str = ""
    ) -> dict:
        """Filling a model metadata template with values retrieved via user input prompts and by traversing nested dicts and list recursively.

        Parameters
        ----------
        metadata_template: dict
            The template containing all keys expected in a model's metadata dictionary.
        metadata: dict
            The currently traversed part of the model's metadata dictionary
        nested_key: str
            the `nested_key` indicates which subpart of the model's metadata we are currently traversing

        Returns
        -------
        dict
            The final fully filled metadata dictionary.
        """

        if metadata_template is None:
            metadata_template = self.metadata_template
        # Prompt user for optional metadata input
        retrieved_nested_key = nested_key
        for key in metadata_template:
            # nested_key to know where we are inside the metadata dict.
            nested_key = (
                key if retrieved_nested_key == "" else f"{retrieved_nested_key}.{key}"
            )
            if not self.is_value_for_key_already_set(
                key=key, metadata=metadata, nested_key=nested_key
            ):
                value_template = metadata_template.get(key)
                if value_template is None:
                    input_value = input(
                        f"{self.model_id}: Please enter value of type float or int for your model for key '{nested_key}': "
                    )
                    try:
                        value_assigned = float(input_value.replace(",", "."))
                    except ValueError:
                        value_assigned = (
                            int(input_value) if input_value.isdigit() else None
                        )
                elif isinstance(value_template, list):
                    input_value = input(
                        f"{self.model_id}: Please enter a comma-separated list of values for your model for key: '{nested_key}': "
                    )
                    value_assigned = (
                        [value.strip() for value in input_value.split(",")]
                        if input_value != ""
                        else []
                    )
                elif isinstance(value_template, str):
                    value_assigned = str(
                        input(
                            f"{self.model_id}: Please enter value of type string for your model for key '{nested_key}': "
                        )
                    )
                elif isinstance(value_template, dict):
                    if len(value_template) == 0:
                        # If dict is empty, no recursion. Instead, we ask the user directly for input.
                        iterations = int(
                            input(
                                f"{self.model_id}: How many key-value pairs do you want to nest below key '{nested_key}' "
                                f"in your model's metadata. Type a number: "
                            )
                            or "0"
                        )
                        nested_metadata: dict = {}
                        for i in range(iterations):
                            nested_key_input = str(
                                input(f"{self.model_id}: Enter key {i + 1}: ")
                            )
                            nested_value_input = input(
                                f"{self.model_id}: For key{i + 1}={nested_key_input}, enter value: "
                            )
                            nested_metadata.update(
                                {nested_key_input: nested_value_input}
                            )
                        value_assigned = nested_metadata
                    else:
                        # From metadata, get the nested dict below the key. If metadata has no nested dict, get the
                        # template's nested dict instead, which is stored in value_template
                        temp_metadata = (
                            metadata.get(key)
                            if metadata.get(key) is not None
                            else value_template
                        )
                        # Filling nested dicts via recursion. value_assigned is of type dict in this case.
                        value_assigned = self._recursively_fill_metadata(
                            metadata_template=value_template,
                            nested_key=nested_key,
                            metadata=temp_metadata,
                        )
                logging.debug(
                    f"{self.model_id}: You provided this key-value pair: {key}={value_assigned}"
                )
                metadata.update({key: value_assigned})
        return metadata

    def __repr__(self):
        return f"ModelContributor(model_id={self.model_id}, metadata={self.metadata})"

    def __len__(self):
        raise NotImplementedError

    def __getitem__(self, idx: int):
        raise NotImplementedError