aboutsummaryrefslogtreecommitdiffstats
path: root/scrape_hexpm.sh
diff options
context:
space:
mode:
Diffstat (limited to 'scrape_hexpm.sh')
-rwxr-xr-xscrape_hexpm.sh48
1 files changed, 48 insertions, 0 deletions
diff --git a/scrape_hexpm.sh b/scrape_hexpm.sh
new file mode 100755
index 0000000..a4a59f7
--- /dev/null
+++ b/scrape_hexpm.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/sh
+
+# This script will scrape packages from hex.pm that are believed to be
+# Erlang packages. We do this by first walking through all packages and
+# then getting the most recent release of each package. If the build tools
+# listed contain "rebar3" or "make" we keep them and write the package
+# name and version to a file.
+#
+# This script should only be run occasionally to refresh the file
+# containing the list of packages.
+#
+# @todo Some of the projects fetched are Elixir despite indicating
+# "rebar3" or "make". We should ignore them here once identified
+# so they don't make it to the output.
+# @todo Probably better to only check "rebar3" since many Elixir
+# projects include "make".
+
+NUM=1
+
+while true; do
+
+ echo "# Packages page $NUM"
+
+ PAGE=$(curl -s "https://hex.pm/api/packages?sort=name&page=$NUM")
+
+ if [ "$PAGE" = "[]" ]; then exit 0; fi
+
+ PACKAGES=$(echo $PAGE | jq -r "map({name: .name, url: .releases[0].url})")
+
+ echo $PACKAGES | jq -r '.[] | [.name, .url] | join(" ")' | while read -r NAMEURL; do
+
+ NAME=$(echo $NAMEURL | awk '{print $1;}')
+ URL=$(echo $NAMEURL | awk '{print $2;}')
+
+ VERSION=$(curl -s "$URL" | jq 'select(.meta.build_tools | index("rebar3") or index("make")) | .version | tostring')
+ VERSION=$(echo $VERSION | tr -d '"')
+
+ if [ -n "$VERSION" ]; then
+ echo "$NAME $VERSION"
+ fi
+
+ done
+
+ NUM=$(expr $NUM + 1)
+
+ sleep 10
+
+done