Remove no longer used sql directory
This commit is contained in:
parent
3440881086
commit
4d14e8928b
2 changed files with 0 additions and 142 deletions
106
sql/crawler.sql
106
sql/crawler.sql
|
@ -1,106 +0,0 @@
|
|||
|
||||
-- -- return a random sample of repos, containing %percent repositories
|
||||
-- create or replace function repos_random_sample_array(percent real)
|
||||
-- returns setof repos as $$
|
||||
-- declare
|
||||
-- samples integer;
|
||||
-- repo repos%rowtype;
|
||||
-- ids integer[];
|
||||
-- begin
|
||||
-- select floor(count(*) / 100 * percent) into samples from repos;
|
||||
-- ids := array(select id from repos order by id);
|
||||
-- for i in 1 .. samples loop
|
||||
-- select * into repo
|
||||
-- from repos
|
||||
-- where id = ids[round(random() * samples)];
|
||||
-- return next repo;
|
||||
-- end loop;
|
||||
-- return;
|
||||
-- end
|
||||
-- $$
|
||||
-- language plpgsql;
|
||||
|
||||
-- return a random sample of repositories
|
||||
create or replace function repos_random_sample(percent real)
|
||||
returns setof repos as $$
|
||||
declare
|
||||
sample_size integer;
|
||||
begin
|
||||
select floor(count(*) / 100 * percent) into sample_size from repos;
|
||||
return query
|
||||
select * from repos
|
||||
order by random()
|
||||
limit sample_size;
|
||||
return;
|
||||
end
|
||||
$$
|
||||
language plpgsql;
|
||||
|
||||
-- -- return a random sample of repositories
|
||||
-- create or replace function random_sample_sequence(percent real)
|
||||
-- returns setof repos as $$
|
||||
-- declare
|
||||
-- sample_size integer;
|
||||
-- seq_size integer;
|
||||
-- min_id integer;
|
||||
-- max_id integer;
|
||||
-- begin
|
||||
-- select floor(count(*) / 100 * percent) into sample_size from repos;
|
||||
-- select min(id) into min_id from repos;
|
||||
-- select max(id) into max_id from repos;
|
||||
-- seq_size := sample_size * 3; -- IDs are sparse, generate a larger sequence
|
||||
-- -- to have enough of them
|
||||
-- return query
|
||||
-- select * from repos
|
||||
-- where id in
|
||||
-- (select floor(random() * (max_id - min_id + 1))::integer
|
||||
-- + min_id
|
||||
-- from generate_series(1, seq_size))
|
||||
-- order by random() limit sample_size;
|
||||
-- return;
|
||||
-- end
|
||||
-- $$
|
||||
-- language plpgsql;
|
||||
|
||||
create or replace function repos_well_known()
|
||||
returns setof repos as $$
|
||||
begin
|
||||
return query
|
||||
select * from repos
|
||||
where full_name like 'apache/%'
|
||||
or full_name like 'eclipse/%'
|
||||
or full_name like 'mozilla/%'
|
||||
or full_name = 'torvalds/linux'
|
||||
or full_name = 'gcc-mirror/gcc';
|
||||
return;
|
||||
end
|
||||
$$
|
||||
language plpgsql;
|
||||
|
||||
create table crawl_history (
|
||||
id bigserial primary key,
|
||||
repo integer references repos(id),
|
||||
task_id uuid, -- celery task id
|
||||
date timestamptz not null,
|
||||
duration interval,
|
||||
status boolean,
|
||||
result json,
|
||||
stdout text,
|
||||
stderr text
|
||||
);
|
||||
|
||||
create index on crawl_history (repo);
|
||||
|
||||
create view missing_orig_repos AS
|
||||
select *
|
||||
from orig_repos as repos
|
||||
where not exists
|
||||
(select 1 from crawl_history as history
|
||||
where history.repo = repos.id);
|
||||
|
||||
create view missing_fork_repos AS
|
||||
select *
|
||||
from fork_repos as repos
|
||||
where not exists
|
||||
(select 1 from crawl_history as history
|
||||
where history.repo = repos.id);
|
|
@ -1,36 +0,0 @@
|
|||
|
||||
create view orig_repos as
|
||||
select id, name, full_name, html_url, description, last_seen
|
||||
from repos
|
||||
where not fork;
|
||||
|
||||
create view fork_repos as
|
||||
select id, name, full_name, html_url, description, last_seen
|
||||
from repos
|
||||
where fork
|
||||
|
||||
create extension pg_trgm;
|
||||
|
||||
create index ix_trgm_repos_description on
|
||||
repos using gin (description gin_trgm_ops);
|
||||
|
||||
create index ix_trgm_repos_full_name on
|
||||
repos using gin (full_name gin_trgm_ops);
|
||||
|
||||
create table repos_history (
|
||||
ts timestamp default current_timestamp,
|
||||
repos integer not null,
|
||||
fork_repos integer,
|
||||
orig_repos integer
|
||||
);
|
||||
|
||||
create view repo_creations as
|
||||
select today.ts :: date as date,
|
||||
today.repos - yesterday.repos as repos,
|
||||
today.fork_repos - yesterday.fork_repos as fork_repos,
|
||||
today.orig_repos - yesterday.orig_repos as orig_repos
|
||||
from repos_history today
|
||||
join repos_history yesterday on
|
||||
(yesterday.ts = (select max(ts)
|
||||
from repos_history
|
||||
where ts < today.ts));
|
Loading…
Add table
Add a link
Reference in a new issue