lister: Make sure lister that requires github tokens can use it

Deploying the nixguix lister, I realized that even though the credentials configuration
is properly set for all listers, the listers actually requiring github origin
canonicalization do not have access to the github credentials. It's lost during the
constructor to only focus on the lister's credentials. Which currently translates to
listers being rate-limited.

This commit fixes it by pushing the self.github_session instantiation in the constructor
when the lister explicitely requires the github session. Hence lifting the rate limit
for maven, packagist, nixguix, and github listers.

Related to infra/sysadm-environment#4655
This commit is contained in:
Antoine R. Dumont (@ardumont) 2022-10-26 17:23:40 +02:00
parent 81688ca17e
commit 92d494261f
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
6 changed files with 20 additions and 28 deletions

View file

@ -11,7 +11,7 @@ from urllib.parse import parse_qs, urlparse
import iso8601
from swh.core.github.utils import GitHubSession, MissingRateLimitReset
from swh.core.github.utils import MissingRateLimitReset
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@ -78,6 +78,7 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
credentials=credentials,
url=self.API_URL,
instance="github",
with_github_session=True,
)
self.first_id = first_id
@ -85,11 +86,6 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
self.relisting = self.first_id is not None or self.last_id is not None
self.github_session = GitHubSession(
credentials=self.credentials,
user_agent=str(self.session.headers["User-Agent"]),
)
def state_from_dict(self, d: Dict[str, Any]) -> GitHubListerState:
return GitHubListerState(**d)
@ -109,6 +105,7 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
logger.debug("Getting page %s", current_url)
try:
assert self.github_session is not None
response = self.github_session.request(current_url)
except MissingRateLimitReset:
# Give up

View file

@ -141,7 +141,7 @@ def test_anonymous_ratelimit(swh_scheduler, caplog, requests_ratelimited) -> Non
caplog.set_level(logging.DEBUG, "swh.core.github.utils")
lister = GitHubLister(scheduler=swh_scheduler)
assert lister.github_session.anonymous
assert lister.github_session is not None and lister.github_session.anonymous
assert "using anonymous mode" in caplog.records[-1].message
caplog.clear()