Skip to content

Commit

Permalink
Fast fail for unknown urls
Browse files Browse the repository at this point in the history
  • Loading branch information
J535D165 committed Aug 20, 2023
1 parent cee97b9 commit fcba102
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 8 deletions.
20 changes: 15 additions & 5 deletions datahugger/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@
from datahugger.exceptions import DOIError


def _red(s):
return f"\u001b[31m{s}\u001b[0m"
def print_red(s):
print(f"\u001b[31m{s}\u001b[0m")


def print_green(s):
print(f"\u001b[32m{s}\u001b[0m")


def main():
Expand Down Expand Up @@ -94,15 +98,21 @@ def main():
if logging.DEBUG == logging.root.level:
raise doi_err
else:
print(_red(f"DOI Error: {doi_err}"))
print_red(f"Error: {doi_err}")
print("")
print("Check if your DOI is correct at doi.org. Is the DOI valid?")
print("Please request support in the issue tracker:")
print("")
print("\thttps://github.com/J535D165/datahugger/issues/new/choose")
print()
exit(1)

except Exception as err:
# raise error when log level is DEBUG
if logging.DEBUG == logging.root.level:
raise err
else:
print(_red(f"Error: {err}"))
print_red(f"Error: {err}")
print("")
print("Do you think this is a data repository that needs to be supported?")
print("Please request support in the issue tracker:")
Expand All @@ -112,7 +122,7 @@ def main():
exit(1)

if args.progress:
print("\u001b[32mDataset successfully downloaded.\u001b[0m")
print_green("Dataset successfully downloaded.")


if __name__ == "__main__":
Expand Down
10 changes: 7 additions & 3 deletions datahugger/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,7 @@ def _base_request(
if uri.hostname in URL_RESOLVE:
r = requests.head(url, allow_redirects=True)
if r.status_code == 404 and r.url and r.url.startswith("https://doi.org"):
raise DOIError(
f"DOI cannot be found in the DOI System, see https://doi.org/{doi}"
)
raise DOIError(f"DOI {doi} not found in the DOI system")
elif r.status_code == 405:
# head request not allowed, try get request
r = requests.get(url, allow_redirects=True)
Expand Down Expand Up @@ -326,6 +324,9 @@ def _resolve_service(url, doi):
def _resolve_service_from_netloc(url):
uri = urlparse(url)

if not uri.hostname:
return None

logging.info(f"Resolve service for netloc '{uri.hostname}'")
if uri.hostname in SERVICES_NETLOC.keys():
return SERVICES_NETLOC[uri.hostname]
Expand All @@ -338,6 +339,9 @@ def _resolve_service_from_netloc(url):


def _resolve_service_with_re3data(doi):
if not doi:
return None

logging.info("Resolve service with datacite and re3data")
publisher = get_datapublisher_from_doi(doi)
logging.info(f"Datacite publisher of dataset: {publisher}")
Expand Down

0 comments on commit fcba102

Please sign in to comment.