commit 0fbdccc61d32c64abfe56251b7b01bd05f2a4719
from: Oliver Lowe <o@olowe.co>
date: Sun Jan 19 03:39:33 2025 UTC

use llm(1) instead of llama-cli

commit - 5a8cb9306ea5b15940ed7ef671f47d6b3f71c3b8
commit + 0fbdccc61d32c64abfe56251b7b01bd05f2a4719
blob - cb5bd5e7c666b52844ca8ad2e457245ddba8d897
blob + aaa322f78365fa1a52272d631ca5b71708b5d252
--- README
+++ README
@@ -9,7 +9,7 @@ The following commands are provided:
 - hlsget - download the contents of a HLS playlist
 - jsfmt - format javascript source code
 - lemmyverse - find lemmy communities
-- llama - prompt a large language model
+- llm - chat with a remote large language model
 - precis - summarise text
 - rfc - read IETF RFC documents
 - webpaste - create a web paste on webpaste.olowe.co
blob - 9fae067a5a3b05a4f91134370de26f2662166a6d (mode 755)
blob + /dev/null
--- bin/llama
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env python
-
-import json
-import os
-import sys
-import urllib.request
-
-url = "https://api.groq.com/openai/v1/chat/completions"
-# small models include:
-# llama-3.1-8b-instant
-# llama-3.2-3b-preview
-# llama-3.2-1b-preview
-model = "llama-3.1-8b-instant"
-big = "llama-3.3-70b-versatile"
-
-def read_token(name):
-	with open(name) as f:
-		return f.read().strip()
-
-tpath = os.path.join(os.getenv("HOME"), ".config/groq/token")
-token = read_token(tpath)
-
-if len(sys.argv) > 1 and sys.argv[1] == "-b":
-	model = big
-prompt = sys.stdin.read()
-message = {"messages": [{"role": "user","content": prompt}], "model": model}
-
-req = urllib.request.Request(url, json.dumps(message).encode())
-req.add_header("Content-Type", "application/json")
-req.add_header("Authorization", "Bearer "+token)
-# groq blocks urllib's user agent
-req.add_header("User-Agent", "curl/8.9.0")
-
-with urllib.request.urlopen(req) as resp:
-	reply = json.load(resp)
-	print(reply["choices"][0]["message"]["content"])
blob - 3b99393fd0c2d9c7bd7e05a6c67c79a031526408
blob + 66401346223339e9dad1a3f72941a7f3b647d222
--- bin/precis
+++ bin/precis
@@ -1,24 +1,6 @@
 #!/bin/sh
 
-# https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_2
+sys='Summarise text provided by the user.
+Reply only with the summary text.'
 
-sys='<|start_header_id|>system<|end_header_id|>
-You summarise text provided by the user. Reply only with the summary text.<|eot_id|>
-<|start_header_id|>user<|end_header_id|>'
-
-tmp=`mktemp`
-echo "$sys" > $tmp
-cat >> $tmp
-echo '<|eot_id|>' >> $tmp
-echo -n '<|start_header_id|>assistant<|end_header_id|>' >> $tmp
-
-usage="usage: precis [model]"
-
-model=$HOME/llama-3.2-3b-instruct-q4_k_m.gguf
-if test $1
-then
-	model=$1
-fi
-
-llama-cli -m  --no-display-prompt -c 8192 -f $tmp 2>/dev/null
-rm $tmp
+llm -s "$sys" $1
blob - 354b1af9e311030085c2c02107b8b5e0a80793e1 (mode 644)
blob + /dev/null
--- man/llama.1
+++ /dev/null
@@ -1,26 +0,0 @@
-.Dd
-.Dt LLAMA 1
-.Sh NAME
-.Nm llama
-.Nd prompt a large language model
-.Sh SYNOPSIS
-.Nm
-.Op Fl b
-.Sh DESCRIPTION
-.Nm
-reads a prompt from the standard input
-and sends it to a large language model hosted by Groq.
-The reply is written to the standard output.
-The default model is Llama 3.1 8B.
-.Pp
-A Groq API token must be written to
-.Pa $HOME/.config/groq/token .
-.Pp
-The following flags are understood:
-.Bl -tag -width Ds
-.It Fl b
-Prompt the "bigger" Llama 3.3 70B model.
-.Sh EXAMPLE
-.Dl echo 'What is LLM slop?' | llama
-.Sh EXIT STATUS
-.Ex
blob - /dev/null
blob + d7d2ff9e7f32aeeb5e468970e9afab5961e4defa (mode 644)
--- /dev/null
+++ man/llm.1
@@ -0,0 +1,63 @@
+.Dd
+.Dt LLAMA 1
+.Sh NAME
+.Nm llm
+.Nd chat with a remote large language model
+.Sh SYNOPSIS
+.Nm
+.Op Fl c
+.Op Fl m Ar model
+.Op Fl s Ar prompt
+.Op Fl u Ar url
+.Sh DESCRIPTION
+.Nm
+starts a chat with a large language model.
+The prompt is read from the standard input
+and the reply is written to the standard output.
+Any model available through
+the OpenAI-compatible chat completion HTTP API
+can be used.
+.Pp
+A back-and-forth chat may be started using the
+.Fl c
+flag.
+In this mode,
+a line consisting of just a literal dot character
+.Pq "."
+sends the prompt.
+Subsequent replies and prompts are included as context for the model's responses.
+.Pp
+An API key written to
+.Pa $HOME/.config/openai/key
+will be included with each request for authentication.
+.Pp
+The following flags are understood:
+.Bl -tag -width Ds
+.It Fl c
+Start a back-and-forth chat.
+.It Fl m Ar model
+Prompt
+.Ar model .
+The default is
+.Ar ministral-8b-latest .
+Note that
+.Xr llama-server 1
+from llama.cpp ignores this value.
+.It Fl s Ar prompt
+Set
+.Ar prompt
+as the system prompt.
+.It Fl u Ar url
+Connect to the OpenAI API root at
+.Ar url .
+The default is
+.Ar http://127.0.0.1:8080 .
+.Sh EXAMPLE
+.Pp
+Chat with a locally-hosted Mistral NeMo model:
+.Bd -literal -offset Ds
+llama-server -m models/Mistral-Nemo-Instruct-2407-Q6_K.gguf -c 16384 -fa &
+echo "Hello, world!" | llm
+.Ed
+.Sh EXIT STATUS
+.Ex
blob - 3ac41db48f0f1929f9d81422b75f1aad3fdd1ca5
blob + e8a7a9a9a952776782cf493f1ab6d6f7a59d82d0
--- man/precis.1
+++ man/precis.1
@@ -5,17 +5,16 @@
 .Nd summarise text
 .Sh SYNOPSIS
 .Nm
-.Op Ar model
+.Op Ar url
 .Sh DESCRIPTION
 .Nm
-reads text from the standard input
-and prints a short summary using a large language model.
-.Ar model
-is a path to a gguf model file.
+summarises text read from the standard input
+using a large language model.
+.Ar url
+is the base URL of an OpenAI-compatible HTTP API.
 The default is
-.Pa $HOME/llama-3.2-3b-instruct-q4_k_m.gguf .
+.Ar http://127.0.0.1:8080 .
 .Sh EXIT STATUS
 .Ex
 .Sh SEE ALSO
-.Xr llama-cli 1 ,
-.Lk https://github.com/ggerganov/llama.cpp llama.cpp
+.Xr llm 1
blob - 66acf4ecc4c76473fd24bbd22b6b548511520c4e
blob + 57f6608404c8ac3c332fdcb2bfb4f92a1792cb27
--- src/llm/llm.go
+++ src/llm/llm.go
@@ -16,8 +16,8 @@ import (
 )
 
 var model = flag.String("m", "ministral-8b-latest", "model")
-var baseURL = flag.String("u", "https://api.mistral.ai", "openai API base URL")
-var sysPrompt = flag.String("s", "You are a helpful assistant.", "system prompt")
+var baseURL = flag.String("u", "http://127.0.0.1:8080", "openai API base URL")
+var sysPrompt = flag.String("s", "", "system prompt")
 var converse = flag.Bool("c", false, "start a back-and-forth chat")
 
 func readToken() (string, error) {
@@ -61,11 +61,11 @@ func main() {
 	}
 	client := &openai.Client{http.DefaultClient, token, *baseURL}
 
-	chat := openai.Chat{
-		Messages: []openai.Message{
+	chat := openai.Chat{Model: *model}
+	if *sysPrompt != "" {
+		chat.Messages =  []openai.Message{
 			{openai.RoleSystem, *sysPrompt},
-		},
-		Model: *model,
+		}
 	}
 	buf := &bytes.Buffer{}
 	if !*converse {