# -*- coding: utf-8 -*-
# ! /usr/bin/env python
"""Zenodo Model uploader class that uploads models to medigan associated data storage on Zenodo. """
from __future__ import absolute_import
import json
import logging
import shutil
from pathlib import Path
import requests
from ..constants import (
CONFIG_FILE_KEY_DESCRIPTION,
CONFIG_FILE_KEY_SELECTION,
CONFIG_FILE_KEY_TAGS,
ZENODO_API_URL,
ZENODO_GENERIC_MODEL_DESCRIPTION,
ZENODO_HEADERS,
ZENODO_LINE_BREAK,
)
from .base_model_uploader import BaseModelUploader
[docs]class ZenodoModelUploader(BaseModelUploader):
"""`ZenodoModelUploader` class: Uploads a user's model via API to Zenodo, here it is permanently stored with DOI.
Parameters
----------
model_id: str
The generative model's unique id
access_token: str
a personal access token in Zenodo linked to a user account for authentication
Attributes
----------
model_id: str
The generative model's unique id
access_token: str
a personal access token in Zenodo linked to a user account for authentication
"""
def __init__(
self,
model_id,
access_token,
):
self.model_id = model_id
self.params = {"access_token": access_token}
############################ UPLOAD ############################
[docs] def create_upload_description(
self, metadata: dict, model_description: str = ""
) -> str:
"""Create a string containing the textual description that will accompany the upload model files.
The string contains tags and a text retrieved from the description subsection of the model metadata.
Parameters
----------
metadata: dict
The model's corresponding metadata
model_description: str
the model_description that will appear on the corresponding Zenodo model upload homepage
Returns
-------
str
Returns the textual description of the model upload
"""
try:
tags = f"{ZENODO_LINE_BREAK} <p><strong>Tags:</strong></p> {metadata[self.model_id][CONFIG_FILE_KEY_SELECTION][CONFIG_FILE_KEY_TAGS]}"
except:
tags = ""
try:
description_from_config = f"<p><strong> Description from model config:</strong></p>: {json.dumps(metadata[self.model_id][CONFIG_FILE_KEY_DESCRIPTION])}"
except:
description_from_config = ""
return f"{model_description} <p><strong>Model ID:</strong></p> {self.model_id}. {ZENODO_LINE_BREAK} <p><strong>Uploaded via:</strong></p> API {tags} {ZENODO_LINE_BREAK} {ZENODO_GENERIC_MODEL_DESCRIPTION.replace('YOUR_MODEL_ID', self.model_id)} {description_from_config} {ZENODO_LINE_BREAK}"
[docs] def create_upload_json_data(
self, creator_name: str, creator_affiliation: str, description: str = ""
) -> dict:
"""Create some descriptive data in dict format to be uploaded and stored alongside the model files.
Parameters
----------
creator_name: str
the creator name that will appear on the corresponding Zenodo model upload homepage
creator_affiliation: str
the creator affiliation that will appear on the corresponding Zenodo model upload homepage
description: str
the model_description that will appear on the corresponding Zenodo model upload homepage
Returns
-------
dict
Returns the descriptive data in dictionary structure describing the model upload.
"""
return {
"metadata": {
"title": f"MEDIGAN MODEL UPLOAD: {self.model_id}",
"upload_type": "software",
"description": description,
"creators": [
{
"name": f"{creator_name}",
"affiliation": f"{creator_affiliation}",
}
],
}
}
[docs] def locate_or_create_model_zip_file(
self, package_path: str, package_name: str
) -> (str, str):
"""If not possible to locate, create a zipped python package of the model.
Parameters
----------
package_path: str
Path as string to the generative model's python package containing an `__init__.py` file
package_name: str
Name of the model's python package i.e. the name of the model's zip file and unzipped package folder
Returns
-------
tuple
Returns a tuple containing two strings: The `filename` and the `file_path` of and to the zipped python package
"""
# Check if zip file already exists
if not (Path(package_path).is_file() and package_path.endswith(".zip")):
# Create a zip archive containing the model package and store that zip file inside the
# folder of the model package
package_parent_path = str(Path(package_path).parent)
logging.info(
f"Archiving the model package as zip archive: base_name={package_parent_path+ '/' + package_name}, root_dir={package_path + '/'} "
)
filename = shutil.make_archive(
base_name=package_parent_path + "/" + package_name,
format="zip",
root_dir=package_path,
)
file_path = filename
filename = Path(file_path).name
else:
filename = Path(package_path).name
file_path = package_path
logging.info(
f"Model was successfully archived as zip archive: filename={filename}, file_path={file_path} "
)
return filename, file_path
[docs] def empty_upload(self) -> dict:
"""Upload an empty placeholder entry to Zenodo as is required to retrieve a `deposition_id` and `bucket_url`.
deposition_id and bucket_url aare needed for file upload and publishing in the subsequent upload steps.
Returns
-------
dict
Returns the response retrieved via the Zenodo API
"""
r = requests.post(
ZENODO_API_URL,
params=self.params,
json={},
headers=ZENODO_HEADERS,
)
if not r.status_code == 201:
raise Exception(
f"{self.model_id}: Error ({r.status_code}!=201) during Zenodo ('{ZENODO_API_URL}') upload (step 1: creating empty upload template): {r.json()}."
)
return r
[docs] def upload(self, file_path: str, filename: str, bucket_url: str) -> dict:
"""Upload a file to Zenodo entry of the uploaded model files.
Parameters
----------
file_path: str
The path of the file that is uploaded to Zenodo
filename: str
The name of the file that is uploaded to Zenodo
bucket_url: str
The bucket url used in the PUT request to upload the data file.
Returns
-------
dict
Returns the response retrieved via the Zenodo API
"""
with open(file_path, "rb") as fp:
r = requests.put(
"%s/%s" % (bucket_url, filename),
data=fp,
params=self.params,
)
if not r.status_code == 200:
raise Exception(
f"{self.model_id}: Error ({r.status_code}!=200) during Zenodo ('{bucket_url}') upload (step 2: uploading model as zip file): {r.json()}"
)
return r
[docs] def upload_descriptive_data(self, deposition_id: str, data: dict) -> dict:
"""Upload textual descriptive data to be associated and added to the Zenodo entry of the uploaded model files.
Parameters
----------
deposition_id: str
The deposition id assigned by Zenodo to the uploaded model file
data: dict
The descriptive information that will to be uploaded to Zenodo and associated with the desposition_id
Returns
-------
dict
Returns the response retrieved via the Zenodo API
"""
deposition_url = f"{ZENODO_API_URL}/{deposition_id}"
r = requests.put(
deposition_url,
params=self.params,
data=json.dumps(data),
headers=ZENODO_HEADERS,
)
if not r.status_code == 200:
raise Exception(
f"{self.model_id}: Error ({r.status_code}!=200) during Zenodo ('{deposition_url}') upload (step 3: updating metadata): {r.json()}"
)
return r
[docs] def publish(self, deposition_id: str) -> dict:
"""Publish a zenodo upload.
This makes the upload official, as it will then be publicly accessible and persistently stored on Zenodo with associated DOI.
Parameters
----------
deposition_id: str
The deposition id assigned by Zenodo to the uploaded model file
Returns
-------
dict
Returns the response retrieved via the Zenodo API
"""
# Get explicit user approval to publish on Zenodo. Published files cannot be deleted.
is_user_sure = str(
input(
f"You are about to publish model {self.model_id} with Zenodo-ID {deposition_id} permanently on {ZENODO_API_URL.replace('/api/deposit/depositions','')}. To proceed, type 'Yes': "
)
)
publish_url = f"{ZENODO_API_URL}/{deposition_id}/actions/publish"
if is_user_sure == "Yes":
r = requests.post(
publish_url,
params=self.params,
)
else:
raise Exception(
f"{self.model_id}: Error during Zenodo ('{publish_url}') upload (step 4: publishing uploaded model) due to user opt-out: You typed '{is_user_sure}' instead of 'Yes'. Model was not published. Try again. Your Zenodo deposition ID (if retrieved): '{deposition_id}'."
)
if not r.status_code == 202:
raise Exception(
f"{self.model_id}: Error ({r.status_code}!=202) during Zenodo ('{publish_url}') upload (step 4: publishing uploaded model): {r.json()}"
)
logging.info(
f"{self.model_id}: Successfully pushed model to Zenodo with DOI '{r.json()['doi']}': '{r.json()['links']['record_html']}"
)
logging.debug(
f"{self.model_id}: Full Zenodo API response after successful publishing of model: {r.json()}"
)
return r
[docs] def push(
self,
metadata: dict,
package_path: str,
package_name: str,
creator_name: str,
creator_affiliation: str,
model_description: str = "",
):
"""Upload the model files as zip archive to a public Zenodo repository where the model will be persistently stored.
Get your Zenodo access token here: https://zenodo.org/account/settings/applications/tokens/new/ (Enable scopes `deposit:actions` and `deposit:write`)
Parameters
----------
metadata: dict
The model's corresponding metadata
package_path: dict
The path to the packaged model files
package_name: dict
The name of the packaged model files
creator_name: str
the creator name that will appear on the corresponding Zenodo model upload homepage
creator_affiliation: str
the creator affiliation that will appear on the corresponding Zenodo model upload homepage
model_description: list
the model_description that will appear on the corresponding Zenodo model upload homepage
Returns
-------
str
Returns the url pointing to the corresponding Zenodo model upload homepage
"""
# Check if zip file exists, else create new one for upload.
filename, file_path = self.locate_or_create_model_zip_file(
package_path=package_path, package_name=package_name
)
# create empty upload to Zenodo to get deposition_id and bucket_url
response = self.empty_upload()
logging.debug(f"API Response after creating empty upload template: {response}")
# Get the deposition id from the response
deposition_id = response.json()["id"]
# Using bucket as defined by Zenodo API for zip file model upload
bucket_url = response.json()["links"]["bucket"]
logging.info(
f"Starting Zenodo upload of model with deposition_id {deposition_id} to {bucket_url}"
)
response = self.upload(
file_path=file_path,
filename=filename,
bucket_url=bucket_url,
)
logging.debug(
f"API Response after uploading model to '{bucket_url}': {response}"
)
# get the model description i.e. model type, metadata info, etc.
description = self.create_upload_description(
metadata=metadata, model_description=model_description
)
# get the data that includes description, but also creator information
data = self.create_upload_json_data(
description=description,
creator_name=creator_name,
creator_affiliation=creator_affiliation,
)
# upload the model zip file and its descriptive data
response = self.upload_descriptive_data(deposition_id=deposition_id, data=data)
logging.debug(
f"API Response after uploading descriptive model data: {response}"
)
# publish to Zenodo. Model will get DOI after this step and become part of Zenodo's permanent record.
response = self.publish(deposition_id=deposition_id)
logging.debug(
f"API Response after publishing the deposition {deposition_id} on Zenodo: {response}"
)
return response.json()["links"]["record_html"] # zenodo_record_url
def __repr__(self):
return f"ZenodoModelUploader(model_id={self.model_id}, zenodo_url={ZENODO_API_URL})"
def __len__(self):
raise NotImplementedError
def __getitem__(self, idx: int):
raise NotImplementedError