Deduplicate origins in the GitHub lister
In some circumstances, GitHub will return two separate repos with the same html_url in the same page. This makes the lister fail with a cardinality error.
This commit is contained in:
parent
8991c625ea
commit
5f567b3c34
1 changed files with 7 additions and 1 deletions
|
@ -8,7 +8,7 @@ import datetime
|
|||
import logging
|
||||
import random
|
||||
import time
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
from typing import Any, Dict, Iterator, List, Optional, Set
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import iso8601
|
||||
|
@ -305,11 +305,17 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
|
|||
"""
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
seen_in_page: Set[str] = set()
|
||||
|
||||
for repo in page:
|
||||
if not repo:
|
||||
# null repositories in listings happen sometimes...
|
||||
continue
|
||||
|
||||
if repo["html_url"] in seen_in_page:
|
||||
continue
|
||||
seen_in_page.add(repo["html_url"])
|
||||
|
||||
pushed_at_str = repo.get("pushed_at")
|
||||
pushed_at: Optional[datetime.datetime] = None
|
||||
if pushed_at_str:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue