packagist: Continue listing when github server hangs up

With or without retry (for a future version of swh.core).

This skips the origin when this sporadically happens. It should get picked up by another
listing eventually.

The listing is currently failing to finish when the github server hangs up on the
process. Adding this behavior allows to skip the issue without breaking the listing.
This commit is contained in:
Antoine R. Dumont (@ardumont) 2023-08-02 17:30:00 +02:00
parent 203f6db8f0
commit f236f3d163
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8

View file

@ -6,11 +6,12 @@
from dataclasses import dataclass
from datetime import datetime, timezone
import logging
from typing import Any, Dict, Iterator, List, Optional
from random import shuffle
from typing import Any, Dict, Iterator, List, Optional
import iso8601
import requests
from tenacity import RetryError
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@ -248,9 +249,14 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
# Non-github urls will be returned as is, github ones will be canonical
# ones
assert self.github_session is not None
origin_url = (
self.github_session.get_canonical_url(origin_url) or origin_url
)
try:
origin_url = (
self.github_session.get_canonical_url(origin_url) or origin_url
)
except (requests.exceptions.ConnectionError, RetryError):
# server hangs up, let's ignore it for now
# that might not happen later on
continue
# bitbucket closed its mercurial hosting service, those origins can not be
# loaded into the archive anymore