114 lines
3.2 KiB
PL/PgSQL
114 lines
3.2 KiB
PL/PgSQL
|
|
CREATE VIEW orig_repos AS
|
|
SELECT id, name, full_name, html_url, description, last_seen
|
|
FROM repos
|
|
WHERE NOT fork;
|
|
|
|
CREATE VIEW fork_repos AS
|
|
SELECT id, name, full_name, html_url, description, last_seen
|
|
FROM repos
|
|
WHERE fork
|
|
|
|
CREATE EXTENSION pg_trgm;
|
|
|
|
CREATE INDEX ix_trgm_repos_description ON
|
|
repos USING gin (description gin_trgm_ops);
|
|
|
|
CREATE INDEX ix_trgm_repos_full_name ON
|
|
repos USING gin (full_name gin_trgm_ops);
|
|
|
|
CREATE TABLE repos_history (
|
|
ts timestamp DEFAULT current_timestamp,
|
|
repos integer NOT NULL,
|
|
fork_repos integer,
|
|
orig_repos integer
|
|
);
|
|
|
|
CREATE VIEW repo_creations AS
|
|
SELECT today.ts :: date as date,
|
|
today.repos - yesterday.repos as repos,
|
|
today.fork_repos - yesterday.fork_repos as fork_repos,
|
|
today.orig_repos - yesterday.orig_repos as orig_repos
|
|
FROM repos_history today
|
|
JOIN repos_history yesterday ON
|
|
(yesterday.ts = (SELECT max(ts)
|
|
FROM repos_history
|
|
WHERE ts < today.ts));
|
|
|
|
-- -- return a random sample of repos, containing %percent repositories
|
|
-- create or replace function repos_random_sample_array(percent real)
|
|
-- returns setof repos as $$
|
|
-- declare
|
|
-- samples integer;
|
|
-- repo repos%rowtype;
|
|
-- ids integer[];
|
|
-- begin
|
|
-- select floor(count(*) / 100 * percent) into samples from repos;
|
|
-- ids := array(select id from repos order by id);
|
|
-- for i in 1 .. samples loop
|
|
-- select * into repo
|
|
-- from repos
|
|
-- where id = ids[round(random() * samples)];
|
|
-- return next repo;
|
|
-- end loop;
|
|
-- return;
|
|
-- end
|
|
-- $$
|
|
-- language plpgsql;
|
|
|
|
-- return a random sample of repositories
|
|
create or replace function repos_random_sample(percent real)
|
|
returns setof repos as $$
|
|
declare
|
|
sample_size integer;
|
|
begin
|
|
select floor(count(*) / 100 * percent) into sample_size from repos;
|
|
return query
|
|
select * from repos
|
|
order by random()
|
|
limit sample_size;
|
|
return;
|
|
end
|
|
$$
|
|
language plpgsql;
|
|
|
|
-- -- return a random sample of repositories
|
|
-- create or replace function random_sample_sequence(percent real)
|
|
-- returns setof repos as $$
|
|
-- declare
|
|
-- sample_size integer;
|
|
-- seq_size integer;
|
|
-- min_id integer;
|
|
-- max_id integer;
|
|
-- begin
|
|
-- select floor(count(*) / 100 * percent) into sample_size from repos;
|
|
-- select min(id) into min_id from repos;
|
|
-- select max(id) into max_id from repos;
|
|
-- seq_size := sample_size * 3; -- IDs are sparse, generate a larger sequence
|
|
-- -- to have enough of them
|
|
-- return query
|
|
-- select * from repos
|
|
-- where id in
|
|
-- (select floor(random() * (max_id - min_id + 1))::integer
|
|
-- + min_id
|
|
-- from generate_series(1, seq_size))
|
|
-- order by random() limit sample_size;
|
|
-- return;
|
|
-- end
|
|
-- $$
|
|
-- language plpgsql;
|
|
|
|
create or replace function repos_well_known()
|
|
returns setof repos as $$
|
|
begin
|
|
return query
|
|
select * from repos
|
|
where full_name like 'apache/%'
|
|
or full_name like 'eclipse/%'
|
|
or full_name like 'mozilla/%'
|
|
or full_name = 'torvalds/linux'
|
|
or full_name = 'gcc-mirror/gcc';
|
|
return;
|
|
end
|
|
$$
|
|
language plpgsql;
|