-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
69 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
name: Sync from Reddit | ||
|
||
on: | ||
schedule: | ||
# run hourly at some arbitrary minute | ||
- cron: 3 * * * * | ||
|
||
jobs: | ||
sync-from-reddit: | ||
name: Sync from Reddit | ||
runs-on: ubuntu-22.04 | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Configure git user | ||
run: | | ||
git config user.name 'github-actions[bot]' | ||
git config user.email 'github-actions[bot]@users.noreply.github.com' | ||
- name: Download wiki from Reddit | ||
run: ./download-wiki.sh | ||
- name: Push changes (if any) | ||
run: git push |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,9 @@ | |
.docusaurus | ||
.cache-loader | ||
|
||
# Download folder for sync | ||
/download | ||
|
||
# Misc | ||
.DS_Store | ||
.idea | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#! /bin/bash | ||
|
||
# Requires: bash coreutils curl jq | ||
# Prerequisite: git user is configured via git config | ||
|
||
# Adapted from https://www.reddit.com/r/DataHoarder/comments/ga2p8y/comment/idpu8cs/ | ||
|
||
USER_AGENT='wikidownload/1.0' | ||
DOWNLOAD_DIR="download" | ||
DOCS_DIR="docs" | ||
NONDOCS_DIR="archive" | ||
SUBREDDIT="JapanFinance" | ||
|
||
while read -r line; do | ||
# Reddit's anonymous access rate limit is 10 requests per minute | ||
# see https://www.reddit.com/r/redditdev/comments/14nbw6g/updated_rate_limits_going_into_effect_over_the/ | ||
sleep 6.1 | ||
|
||
PAGE="$line" | ||
# strip index/ from beginning of page path | ||
SOURCE_PAGE_JSON="./$DOWNLOAD_DIR/${PAGE#*index/}.json" | ||
TARGET_PAGE_MD="./$DOCS_DIR/${PAGE#*index/}.md" | ||
# do not publish config markdown, but archive it in this same repo | ||
if [[ $PAGE == config* ]]; then TARGET_PAGE_MD="./$NONDOCS_DIR/$PAGE.md"; fi | ||
|
||
# strip file name from end of path when making directories | ||
mkdir -p "${SOURCE_PAGE_JSON%/*}" | ||
mkdir -p "${TARGET_PAGE_MD%/*}" | ||
curl -s --user-agent "$USER_AGENT" "https://www.reddit.com/r/$SUBREDDIT/wiki/$PAGE.json" > "$SOURCE_PAGE_JSON" | ||
printf "$SUBREDDIT/wiki/$PAGE " ; echo $? | ||
|
||
REASON="$(jq -r '.data.reason' "$SOURCE_PAGE_JSON")" | ||
AUTHOR="$(jq -r '.data.revision_by.data.name' "$SOURCE_PAGE_JSON")" | ||
# Rewrite wiki links before saving Markdown file | ||
jq -r '.data.content_md' "$SOURCE_PAGE_JSON" | sed 's,https://www.reddit.com/r/JapanFinance/wiki/index/,,g' > "$TARGET_PAGE_MD" | ||
# If the wiki page was changed, commit it. | ||
if [ -n "$(git status --porcelain)" ]; then | ||
git add "$TARGET_PAGE_MD" | ||
git commit -m "Sync from Reddit" -m "$REASON" -m "Change made by u/$AUTHOR" | ||
git --no-pager diff | ||
fi | ||
|
||
done < <(curl -S -s --user-agent "$USER_AGENT" "https://www.reddit.com/r/$SUBREDDIT/wiki/pages.json" | jq -r '.data | .[]') | ||
|
||
rm -rf "./$DOWNLOAD_DIR" |