commit 0fbdccc61d32c64abfe56251b7b01bd05f2a4719 from: Oliver Lowe date: Sun Jan 19 03:39:33 2025 UTC use llm(1) instead of llama-cli commit - 5a8cb9306ea5b15940ed7ef671f47d6b3f71c3b8 commit + 0fbdccc61d32c64abfe56251b7b01bd05f2a4719 blob - cb5bd5e7c666b52844ca8ad2e457245ddba8d897 blob + aaa322f78365fa1a52272d631ca5b71708b5d252 --- README +++ README @@ -9,7 +9,7 @@ The following commands are provided: - hlsget - download the contents of a HLS playlist - jsfmt - format javascript source code - lemmyverse - find lemmy communities -- llama - prompt a large language model +- llm - chat with a remote large language model - precis - summarise text - rfc - read IETF RFC documents - webpaste - create a web paste on webpaste.olowe.co blob - 9fae067a5a3b05a4f91134370de26f2662166a6d (mode 755) blob + /dev/null --- bin/llama +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python - -import json -import os -import sys -import urllib.request - -url = "https://api.groq.com/openai/v1/chat/completions" -# small models include: -# llama-3.1-8b-instant -# llama-3.2-3b-preview -# llama-3.2-1b-preview -model = "llama-3.1-8b-instant" -big = "llama-3.3-70b-versatile" - -def read_token(name): - with open(name) as f: - return f.read().strip() - -tpath = os.path.join(os.getenv("HOME"), ".config/groq/token") -token = read_token(tpath) - -if len(sys.argv) > 1 and sys.argv[1] == "-b": - model = big -prompt = sys.stdin.read() -message = {"messages": [{"role": "user","content": prompt}], "model": model} - -req = urllib.request.Request(url, json.dumps(message).encode()) -req.add_header("Content-Type", "application/json") -req.add_header("Authorization", "Bearer "+token) -# groq blocks urllib's user agent -req.add_header("User-Agent", "curl/8.9.0") - -with urllib.request.urlopen(req) as resp: - reply = json.load(resp) - print(reply["choices"][0]["message"]["content"]) blob - 3b99393fd0c2d9c7bd7e05a6c67c79a031526408 blob + 66401346223339e9dad1a3f72941a7f3b647d222 --- bin/precis +++ bin/precis @@ -1,24 +1,6 @@ #!/bin/sh -# https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_2 +sys='Summarise text provided by the user. +Reply only with the summary text.' -sys='<|start_header_id|>system<|end_header_id|> -You summarise text provided by the user. Reply only with the summary text.<|eot_id|> -<|start_header_id|>user<|end_header_id|>' - -tmp=`mktemp` -echo "$sys" > $tmp -cat >> $tmp -echo '<|eot_id|>' >> $tmp -echo -n '<|start_header_id|>assistant<|end_header_id|>' >> $tmp - -usage="usage: precis [model]" - -model=$HOME/llama-3.2-3b-instruct-q4_k_m.gguf -if test $1 -then - model=$1 -fi - -llama-cli -m --no-display-prompt -c 8192 -f $tmp 2>/dev/null -rm $tmp +llm -s "$sys" $1 blob - 354b1af9e311030085c2c02107b8b5e0a80793e1 (mode 644) blob + /dev/null --- man/llama.1 +++ /dev/null @@ -1,26 +0,0 @@ -.Dd -.Dt LLAMA 1 -.Sh NAME -.Nm llama -.Nd prompt a large language model -.Sh SYNOPSIS -.Nm -.Op Fl b -.Sh DESCRIPTION -.Nm -reads a prompt from the standard input -and sends it to a large language model hosted by Groq. -The reply is written to the standard output. -The default model is Llama 3.1 8B. -.Pp -A Groq API token must be written to -.Pa $HOME/.config/groq/token . -.Pp -The following flags are understood: -.Bl -tag -width Ds -.It Fl b -Prompt the "bigger" Llama 3.3 70B model. -.Sh EXAMPLE -.Dl echo 'What is LLM slop?' | llama -.Sh EXIT STATUS -.Ex blob - /dev/null blob + d7d2ff9e7f32aeeb5e468970e9afab5961e4defa (mode 644) --- /dev/null +++ man/llm.1 @@ -0,0 +1,63 @@ +.Dd +.Dt LLAMA 1 +.Sh NAME +.Nm llm +.Nd chat with a remote large language model +.Sh SYNOPSIS +.Nm +.Op Fl c +.Op Fl m Ar model +.Op Fl s Ar prompt +.Op Fl u Ar url +.Sh DESCRIPTION +.Nm +starts a chat with a large language model. +The prompt is read from the standard input +and the reply is written to the standard output. +Any model available through +the OpenAI-compatible chat completion HTTP API +can be used. +.Pp +A back-and-forth chat may be started using the +.Fl c +flag. +In this mode, +a line consisting of just a literal dot character +.Pq "." +sends the prompt. +Subsequent replies and prompts are included as context for the model's responses. +.Pp +An API key written to +.Pa $HOME/.config/openai/key +will be included with each request for authentication. +.Pp +The following flags are understood: +.Bl -tag -width Ds +.It Fl c +Start a back-and-forth chat. +.It Fl m Ar model +Prompt +.Ar model . +The default is +.Ar ministral-8b-latest . +Note that +.Xr llama-server 1 +from llama.cpp ignores this value. +.It Fl s Ar prompt +Set +.Ar prompt +as the system prompt. +.It Fl u Ar url +Connect to the OpenAI API root at +.Ar url . +The default is +.Ar http://127.0.0.1:8080 . +.Sh EXAMPLE +.Pp +Chat with a locally-hosted Mistral NeMo model: +.Bd -literal -offset Ds +llama-server -m models/Mistral-Nemo-Instruct-2407-Q6_K.gguf -c 16384 -fa & +echo "Hello, world!" | llm +.Ed +.Sh EXIT STATUS +.Ex blob - 3ac41db48f0f1929f9d81422b75f1aad3fdd1ca5 blob + e8a7a9a9a952776782cf493f1ab6d6f7a59d82d0 --- man/precis.1 +++ man/precis.1 @@ -5,17 +5,16 @@ .Nd summarise text .Sh SYNOPSIS .Nm -.Op Ar model +.Op Ar url .Sh DESCRIPTION .Nm -reads text from the standard input -and prints a short summary using a large language model. -.Ar model -is a path to a gguf model file. +summarises text read from the standard input +using a large language model. +.Ar url +is the base URL of an OpenAI-compatible HTTP API. The default is -.Pa $HOME/llama-3.2-3b-instruct-q4_k_m.gguf . +.Ar http://127.0.0.1:8080 . .Sh EXIT STATUS .Ex .Sh SEE ALSO -.Xr llama-cli 1 , -.Lk https://github.com/ggerganov/llama.cpp llama.cpp +.Xr llm 1 blob - 66acf4ecc4c76473fd24bbd22b6b548511520c4e blob + 57f6608404c8ac3c332fdcb2bfb4f92a1792cb27 --- src/llm/llm.go +++ src/llm/llm.go @@ -16,8 +16,8 @@ import ( ) var model = flag.String("m", "ministral-8b-latest", "model") -var baseURL = flag.String("u", "https://api.mistral.ai", "openai API base URL") -var sysPrompt = flag.String("s", "You are a helpful assistant.", "system prompt") +var baseURL = flag.String("u", "http://127.0.0.1:8080", "openai API base URL") +var sysPrompt = flag.String("s", "", "system prompt") var converse = flag.Bool("c", false, "start a back-and-forth chat") func readToken() (string, error) { @@ -61,11 +61,11 @@ func main() { } client := &openai.Client{http.DefaultClient, token, *baseURL} - chat := openai.Chat{ - Messages: []openai.Message{ + chat := openai.Chat{Model: *model} + if *sysPrompt != "" { + chat.Messages = []openai.Message{ {openai.RoleSystem, *sysPrompt}, - }, - Model: *model, + } } buf := &bytes.Buffer{} if !*converse {