Skip to content

Commit

Permalink
Add yearly cron (#268)
Browse files Browse the repository at this point in the history
* Adding yearly table generation

* Removing unused comment

Co-authored-by: Matt Gaunt <[email protected]>
  • Loading branch information
gauntface and Matt Gaunt authored Dec 20, 2022
1 parent 83006ac commit e312036
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
3 changes: 3 additions & 0 deletions cron/tasks.cron
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,8 @@
# [Daily at 00:10] append daily results to monthly rollup
10 0 * * * /bin/bash -l -c 'bq query --append_table --allow_large_results --noflatten_results --destination_table=month.`date +"\%Y\%m" -d"1 day ago"` "SELECT * FROM [githubarchive:day.`date +"\%Y\%m\%d" -d"1 day ago"`]"' >> /home/igrigorik/githubarchive.org/crawler/data/append.log 2>&1

# [Yearly at 5am on 1st January] Generate the yearly tables
0 5 1 1 * /bin/bash -l -c 'sh /home/igrigorik/githubarchive.org/scripts/gen_yearly.sh `date +"\%Y" -d"1 year ago"` `date +"\%Y" -d"today"`' >> /home/igrigorik/githubarchive.org/crawler/data/append.log 2>&1

# [Daily at 00:00] keep last 365 days worth of data
0 0 * * * find /home/igrigorik/githubarchive.org/crawler/data/*json* -mtime +365 -exec rm {} \;
21 changes: 21 additions & 0 deletions scripts/gen_yearly.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

yearstart=$1
yearend=$2

now=`date +"%Y-%m-%d" -d "01/01/$yearstart"`
end=`date +"%Y-%m-%d" -d "01/01/$yearend"`

while [ "$now" != "$end" ];
do
echo "Processing $now"
dst=`date +"%Y" -d "$now"`;
tst=`date +"%Y-%m" -d "$now"`;
tnd=`date +"%Y-%m" -d "$now + 1 year"`;

echo "\t $dst <- $tst - $tnd"
query="SELECT * FROM (TABLE_DATE_RANGE([day.], TIMESTAMP('$tst-01'), TIMESTAMP('$tnd-01')))"
echo $(bq query --allow_large_results --noflatten_results --replace --destination_table=year.$dst "$query")

now=`date +"%Y-%m-%d" -d "$now + 1 year"`;
done

0 comments on commit e312036

Please sign in to comment.