From aafaebd5debce46b6fe085a5660e0fb5d4774373 Mon Sep 17 00:00:00 2001 From: Antoine Lambert Date: Wed, 21 Aug 2024 15:17:28 +0200 Subject: [PATCH] crates: Use looseversion.LooseVersion2 to parse crate versions packaging.version.parse is dedicated to parse Python package version numbers but crate versions do not necessarily respect Python version number conventions and thus some crate versions cannot be parsed. Prefer to use looseversion.LooseVersion2 instead which in a drop-in replacement for deprecated distutils.version.LooseVersion and enables to parse all kind of version numbers. --- .pre-commit-config.yaml | 1 + requirements.txt | 1 + swh/lister/crates/lister.py | 4 ++-- .../tests/data/fake_crates_repository_init.sh | 1 + .../https_static.crates.io/db-dump.tar.gz | Bin 1358 -> 1420 bytes .../db-dump.tar.gz_visit1 | Bin 1534 -> 1591 bytes swh/lister/crates/tests/test_lister.py | 13 +++++++++++++ 7 files changed, 18 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ce247f1..5e13001 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,6 +32,7 @@ repos: stages: [commit] - id: codespell name: Check commit message spelling + args: [-L crate] stages: [commit-msg] - repo: local diff --git a/requirements.txt b/requirements.txt index e8bf14c..dead79e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ dateparser dulwich iso8601 launchpadlib +looseversion lxml psycopg2 pyreadr diff --git a/swh/lister/crates/lister.py b/swh/lister/crates/lister.py index a17ce7d..e31756c 100644 --- a/swh/lister/crates/lister.py +++ b/swh/lister/crates/lister.py @@ -15,7 +15,7 @@ from typing import Any, Dict, Iterator, List, Optional from urllib.parse import urlparse import iso8601 -from packaging.version import parse as parse_version +from looseversion import LooseVersion2 from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin @@ -201,7 +201,7 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]): for name, item in dataset.items(): page = [] # sort crate versions - versions: list = sorted(item["versions"].keys(), key=parse_version) + versions = sorted(item["versions"].keys(), key=LooseVersion2) for version in versions: v = item["versions"][version] diff --git a/swh/lister/crates/tests/data/fake_crates_repository_init.sh b/swh/lister/crates/tests/data/fake_crates_repository_init.sh index b58d195..8078fad 100755 --- a/swh/lister/crates/tests/data/fake_crates_repository_init.sh +++ b/swh/lister/crates/tests/data/fake_crates_repository_init.sh @@ -17,6 +17,7 @@ echo -e '''created_at,description,documentation,downloads,homepage,id,max_upload ''' > data/crates.csv echo -e '''checksum,crate_id,crate_size,created_at,downloads,features,id,license,links,num,published_by,updated_at,yanked +d879626d5babe4ca6c4ec953d712e28d939672b325a4f9352f28ca3c82568a15,1339,,2014-12-18 06:56:46.88489,845,{},1321,MIT/Apache-2.0,,0.1.3-experimental,,2017-11-30 05:24:37.146115,f 398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944,2233,,2015-05-27 23:19:16.848643,1961,{},10855,MIT/Apache-2.0,,0.1.0,,2017-11-30 03:37:17.449539,f 343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9,545,,2014-12-18 06:56:46.88489,845,{},1321,MIT/Apache-2.0,,0.1.2,,2017-11-30 02:29:20.01125,f 6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7,1339,,2015-02-03 11:15:19.001762,8211,{},4371,MIT/Apache-2.0,,0.1.2,,2017-11-30 03:14:27.545115,f diff --git a/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz b/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz index bd74c75b0fffe577720c990446d756eed21118f8..12b3082d29757e19fbe127261bb86bec5cce6b1c 100644 GIT binary patch literal 1420 zcmV;71#|iziwFP!000001MOH#Z`(!?_F2Dz@R_1_X7Ds=2xP<`4{{)I5Sw;=}PBu0U#yLJ87%B>)W2 zRi(bYYFAgMaq`Eiy7K+b??XOLPsn}l`F|A64GP4qu*(O9K=a}r@#Xd>C2+O>zky5k z-{;9EKl$d@ap+%QkJ-Rl$|vm)+B~wq=D({-yujX{#sA#?e{QPkdg}I>Y1kjG+v@rs ze|ROZ6{1DcXsN&f|NrH>o+rekpYnLy4YzUF@4IQ9M_CmSWI|6CeH2cCb+#l_rf5fo zWcDd%yx#n^*?jL~uXw%;a4~j&WWLONoo@Y9r{VX0H~2Kxh3}n?d8}D> z*TvkAnaBS1Sogfl;S_e=c+Zo<+xm1!@8-Yt{p*}I$r`6oCl!3iGI||lj!scBfEjIa z%^)7mFd(KmGXX z=7)plBW|$~buAibXq(?0@{tQ(Kl$CVlGy@mNmN3$C|fcO`y%jGWkb$7g&vEJ2@+z6 zV2mtDYqG*-!dOWAlmuGArA@_@tn{{&f+;vDYeb@D549C)i>5+q!L`8JN~t2 zg2<2n!8=Sc7W0q6nL^G}Q~|W0n8jw!A{uLpuuv$;JWE5ifSh|8KKI$mDVKmKq7;ff z5`t4D+aRLzIva~Ad2MBckc!n#rh*O(F^|?0lbGodgRTP27H%aphK#`)q_bxVHC^_} zPbg#)xCV*0GpwMrUaBBHtx44NG+4+5!mN!a1c}E zNOy^Qdv^Mex!A;%y+);?dc+}Bh%JZ~m>hJ{$(m5eg+vhIIdOumg4xQ*R#m_ndSOB? z&gqcIdP9+YjEyH%!U$|&8zgvdI2%cqvE)=rX2&f?#>9@PA8Dv66fHq3ZKDhyQP-A( zWl_-B=TXQX4iu0Cf>^=t(-S{g|IaP{3snEl)_+hEAHV-C{oh*uzdb%I@p+8L?l5uJ z$K^hrmJ<2!?OpM|ANKj+@A6#y_x{c8=`i0vcMHAW=bD#G%WxRRZW@jsHUXQ98Auvq zslNW%_i5Ny+(R(Q)m`p0JK%5}E52V1(>>ojE+Ty%$EuwAIIHG&-SoD(uJ5PmFt#^0 ztoPU)$D3J{)1Gt0NtfO~KC-p9=Vh9^Zn{5(rW}Xhb^SwMb^F6EzbCgEN_EV4ryYACw-D|^}Aiw z^@r0mULr2vw0A4}Bs( z$7?^bn1;|dhq38e7Q1lVr*`zW-+pU%osR?CcKdYjo5Y&P_Wr~5@i1M#9-7}1YyCbk zdR&}`LpL;I*MDTkL%dz#63Md^WNR)8t}8GV$P%P6R==S$5PY*w(4X!~J+*ZO=(v(~%XYI62xdyr= z3d9O{1yBku1OQvTX^Uom*rhvhi!K*^+8lQjh3Uhgr`^$2+F{ohZ)wr8c(-kahepRC z(Uhib##H#@*zGB0XQehrPxQt_w|7mO=%Hh?PX*l%+os(VWb1dkqG=DuaX2Sj+N2N6 zDvjJZ{iqT<-)?T_Dyc}f`Qvk@pL8>8t_rXf=amqp0kl@#hKE|CDNt14D$u1B4#1FZ zXGAIvz8_Nm*e=$?N0N&+BhfiyLEv)c;pO*rsqZ9QSPQ_t#X`Pao!re!n@~wSY|5j+B8@3)nRw zwL_w@y%`wQZgx1lAv-Iz)(wbpXoD9JLmf+axe#p?!=T zEsrUw5Tlje=A1FsPRiQ^JzFWH)lx8U8rY}L47gQ_{ruCrt2YNvyIf<*8RI2P1KFE3 zz*_Kvi>Y{nDW!CZT0p)OQq++qKuWZj)Qam^>nwwo3RO%x8q7S|5R?$v3ZJ|)StKK^ z4Rv;>ig?!q?9}U&t|U<;j*jY)nh6Sov;6Q&epsQake3_)717fP(WNLHQen&9i2%`P z4mN@_wMkGLiHa+Mwa>LMHBS%$!8?pHWOLi;?)74T3KXeWPI;-3S%g_aDa^ANk_Tjk zrlfrXOcZ0vH6jm^D;Yf0PUU249-P<7Sd7tYD+AOqTkT}b=)efl(Rx85ChBWkmTBL{!?B2bKx^Q4LxfE;XV3ErC&Xt{(r#heobZZCL z15im@Dnnb;%u=$<1dV;3g!H0BnOlh(I3?u*MVppMy3J z`oEn1E?D~iE8x>*QLLt)2qeS)u&RpHU-9sca-I(8l=Ia2zyZDgvSO3L>Cb`drn{!o z@V*%*JCRi8wT$S6MIX3RV4ckomC4!(BP9Jaq?Wq6{Jdo5f&~i}ELgB$!GZ+~7A#n> QV8N^54*=fb>i{SK01MuucK`qY diff --git a/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz_visit1 b/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz_visit1 index 0b7dd384a90ce172fa758391869b2d998bfd8b5c..7164da399f78a0a27ead124833eaf7ec7b30d344 100644 GIT binary patch literal 1591 zcmV-72FUpziwFP!000001MOH#kK0BPp0j=hF=vX?Rn<@4a@<1@!0>4=gJwT8%t)j_ zQdxV0{P!(VV|zT-#A9UQAXyd!h$8#(^;fTts&20SbQ5ewsreg>5|6(>*cC{vQ7Azg zuLOVrx+>J45AEveG){gfiYwpk{h>+2^vt-gJ^!0pb%O$NE8JG7tw15rym~}@yZ=e? zt=0eE!7lm#kR~7fEv zC?%#Oth6#E6=mZCU2lHdZ2szFul8V<@_)|#*K7jcUjL;8SnL1yuxI`MOB%+u@5U;O zzx=^HzEb}Q1U;?)#?V^-zlVjp6z<2$PF&UXr;F~bL(rIlqDk3vWJ?}3r&GkgR6nj zN~sJHr5_^-?ub2lO_OCH@^7;A+_b1qv!PJXK|SG+3d9;j4NML?>10VL42bpc9E5o zQ6>nLB1X;@OU!P)qmf^9!tD5hM_%9tom8* zh0`LD9)5ige|P;M9sMrN;_u+!-JXu~{j*x^{2`URTwK16{n$?Z@K^+HE_NVkjHUAW zjqhTAC|E->$;B>pi36}7#)9t`{dC7Shh<2Y#<9q!F3iyUzMbAz*X7+b9mo3ShH;P8 zFy72Vx$QYe+;r*V*ckp$o){f;Z3XJ z?Oi)Qopc%#4{5u0N`*g7{ee?QwMdv?v3Vt5$+HP0yT)*EJZFf9P z<0azKF1;(Q;$-Ar`79HA-|Zg8E`9NQ`I6yh-VB$U0_^dtC-&(b~sFTTYM`hkF zkW?J~Fs9+TQ*6igOcx(UB8wa&UxaQE?4M2l)sF`* zR7wlsa6Z*iW*29VKBvWnY1s8$e`rH-Y`g7o=zmU2tn)cbX-SbGTYhNv<)bkdp-#TN pZ@1~@@wn&v`P$7_TWz(~R$Fbg)mB?=wblNi_CIYdJ;nei008fJ7i$0j literal 1534 zcmV*i^c(9Hai{%Fal*n%OSC6i*TGh?f-wr{rTF+OoT0Fh}i7QYNgi%&V zbtOP(VXv6}`>9=BoyI8)j9rx>bn*V>ao>9WKZ@!G1>#ottW9z#bW9H`Y>*;LNwyV|2~&fuK~P`IHNTst<5=I^5bv=X#+!L42kE_( zM#{&>14Vm#eoeJ&r@K>Awf#-g_xChddd`=5kHa5^#QyMf=K1C4ZG&4-Ot@OwTG>iS z?VX#=R9m1&k|EY0YJf@#DFOKP4;^a{$9;ZuF6#>$^6s>!EX<#d16fCR$-;hMA865v zeY|VO=aWujrXg+DPML+%)E{WeE}q(*0@0f?J>Iunrk76bAv1a&@7iw1Xl~f=S=${? z(|E~od6z$Nrwt14&5NGchi>;URx3?@TfV+#_}Ook$_)co3sFm1SwQFXLwe{fT86R) zUxTTf^Z=IpcEL&P7=|$qFIll2Ka;!27=_+jt6s$pDO>norhl(dw$`5Vp+f0E+Rbbg z6<62_$3Rmm)B-APgzz|9wUpT<*kdSp31J>~eb*n_$c}BdJr4cPdC7IMlPf2+w9;`7 z!YXMve=_DG)hV?1?Ka;$9e1Ydwm@`ct#M9JqY%(%X>k)&KWEyvy-^JRSH_h;B*N&tDcrf7%>8wg83hou~rOEx^8wxf?TGyZe!o zcJt$@*|*~zO=_M>KmQ!M`)2h~TXHUIG+A8&p*2C{OCmEc@dFkfiC-2!Z-U_#E~ z4YpFsYia??Mk?7P8UQ)dV#;V?Qe%pWMrqV3n`E(w>|)eX7AHdv-WHjyaxON-pB3@3 z32<|-qfM=dB60MjM|vSB5iat>zw*NxeT||L0H}$c$%sBD>5+u31TP~bYXrCi-ZnNv zV-;$lB{rcn(l#PP0)*f(saWhor}Ly2BTbr>$yf8fG{_yo;zA|us~A!QRE@SGy8&j3 zDVGLOL?yI}0UEDMc8!QWm~0)U6pT|5npB+eDi!o#CHc_>Nlwi2nFl=s>K4g^P+4g$ za!}r#U1;P*g7PaDQV2XrGA(C`pmIU!Mwy7gMd>8{P7TnZvjG`p)VZPADvKyejv8Zh zF8k6z%AlfF>1!W;>1Pc!8_2CEYKsrap_eGaS6L{R>CRB@6U8D*ff$T}B*}3YrBFtt zKv$rgbB%|XV)WW4Lu(pnGL`R`a-edmqkiVYr>!V@gykC_Y(|8*QV| zIbovRq61MhImSPmEN$kHzT7o&?WnyMGg*qBryF0WvY$RMB2NulMbwbfIH~{LyUM%v zV3+j&kf$*B0M*Z9-@U;a-_U;n#y-=3Wwl)O|2^>ghOz7Uh)8ZY9ItD3{Uskh(w?US z8tr-GECq=q^`F;#o^bwi=(g=|>4