commit 4fe62619eb0056f51a56c8117074b04245c09e74 from: Oliver Lowe date: Tue Jul 18 08:18:07 2023 UTC lemmyverse: generate our database on the fly to simplify cache Our awk program can go through the database really quickly now that we trim the fat of community.full.json before we store info from it. commit - 2f11327c092f6d29cf4651b0d75082fe9bb413ba commit + 4fe62619eb0056f51a56c8117074b04245c09e74 blob - ee89a78a7adbd229a6fe2e3ab8e48128803542fc blob + f5561cf5a19de533c3990fc9e30348b0beaca5cd --- bin/lemmyverse +++ bin/lemmyverse @@ -8,24 +8,27 @@ then fi cachedir=$HOME/.cache/lemmyverse +if test -n "$XDG_CACHE_HOME" +then + cachedir=$XDG_CACHE_HOME/lemmyverse +fi mkdir -p $cachedir + +# thousands of spam/bot communities at lemmy.world? +badnames='enoweiooe' + cd $cachedir -if ! test -f community.full.json +if ! test -f communities then - curl --compressed https://lemmyverse.net/data/community.full.json > community.full.json + curl --compressed https://lemmyverse.net/data/community.full.json | \ + tr ',' '\n' | \ + grep -v $badnames | \ + tr -d '"{}[]' | \ + grep -E '(^baseurl)|(^name)|(^desc)' > communities fi -if test -f communities -then - grep $@ $cachedir/communities - exit $? -fi - -< community.full.json tr ',' ' -' | tr -d '"{}' | grep '(^baseurl)|(^name)|(^desc)' | awk -F : ' +awk -F : ' $1 == "baseurl" { instance=$2 } $1 == "desc"{ desc = $2 } $1 == "name" { name = $2 } -NR % 3 == 0 { printf "%s@%s\t%s\n", name, instance, desc }' | grep -v 'enoweiooe' | sort > communities - -grep $@ $cachedir/communities +NR % 3 == 0 { printf "%s@%s\t%s\n", name, instance, desc }' communities | sort | grep "$@" blob - 488076dcd84b76f22d358dc9c29c0e03f48bcfc2 blob + 4a3c1ca8a2214c914a447e56a83c31caa299c527 --- man/lemmyverse.1 +++ man/lemmyverse.1 @@ -1,6 +1,6 @@ .Dd .Dt LEMMYVERSE 1 -.Sh Name +.Sh NAME .Nm lemmyverse .Nd search lemmy communities .Sh SYNOPSIS @@ -10,25 +10,45 @@ .Nm searches Lemmy communities indexed by .Pa lemmyverse.net -using the given regular expression. +using the given regular expression as interpreted by +.Xr grep 1 . Both the names and descriptions of the communities are searched. .Pp -On first run, a search index file of communities must be generated. -The full community database is downloaded from +On first run, a local community database must be generated. +The full community index is downloaded from .Pa https://lemmyverse.net -and stored in the user cache directory. +using +.Xr curl 1 , +transformed, +then stored in the user cache directory. +To regenerate the database, +remove the file and run .Nm -then generates the index. -To fetch a new database or generate a new index, -remove each file respectively. -.Sh EXAMPLE +again. +.Sh FILES +.Bl -tag -width Ds +.It Pa communities +Community database from lemmyverse.net. +.El +.Sh ENVIRONMENT +.Nm +uses the following environment variables: +.Bl -tag -width Ds +.It Ev XDG_CACHE_DIR +The directory to store the community database and index. +If unset, +.Pa $HOME/.cache/lemmyverse +is used. +.El +.Sh EXAMPLES Find communities for the Plan 9 operating system: .Dl lemmyverse '(plan9)|(Plan 9)' .Pp List all communities from the instance lemmy.sdf.org: -.Dl lemmyverse '.*@lemmy.sdf.org' +.Dl lemmyverse '@lemmy.sdf.org' .Sh EXIT STATUS .Ex .Sh SEE ALSO +.Xr grep 1 , .Xr curl 1 , .Lk https://lemmyverse.net