Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.zerogpu.ai/llms.txt

Use this file to discover all available pages before exploring further.

Both examples submit 3 chat-completion requests, wait for completion, and parse the results. The examples assume two environment variables are set:
export ZGPU_API_KEY="your-api-key"
export ZGPU_PROJECT_ID="your-project-uuid"
And that the production base URL is used. Swap in staging.api.zerogpu.ai or dev.api.zerogpu.ai for non-prod work.

curl walkthrough

1. Build the input JSONL

cat > input.jsonl <<'EOF'
{"custom_id":"q-1","method":"POST","url":"/v1/chat/completions","body":{"model":"<model-id>","messages":[{"role":"user","content":"What is the capital of France?"}]}}
{"custom_id":"q-2","method":"POST","url":"/v1/chat/completions","body":{"model":"<model-id>","messages":[{"role":"user","content":"What is the capital of Germany?"}]}}
{"custom_id":"q-3","method":"POST","url":"/v1/chat/completions","body":{"model":"<model-id>","messages":[{"role":"user","content":"What is the capital of Italy?"}]}}
EOF

2. Upload it

UPLOAD=$(curl -s -X POST https://api.zerogpu.ai/v1/files \
  -H "x-api-key: $ZGPU_API_KEY" \
  -H "x-project-id: $ZGPU_PROJECT_ID" \
  -F purpose=batch \
  -F [email protected])

echo "$UPLOAD"
# {
#   "id": "file-Abc123...",
#   "object": "file",
#   "bytes": 612,
#   "filename": "input.jsonl",
#   "purpose": "batch",
#   "status": "processed",
#   ...
# }

INPUT_FILE_ID=$(echo "$UPLOAD" | jq -r '.id')

3. Create the batch

BATCH=$(curl -s -X POST https://api.zerogpu.ai/v1/batches \
  -H "x-api-key: $ZGPU_API_KEY" \
  -H "x-project-id: $ZGPU_PROJECT_ID" \
  -H "content-type: application/json" \
  -d "{
    \"input_file_id\": \"$INPUT_FILE_ID\",
    \"endpoint\": \"/v1/chat/completions\",
    \"completion_window\": \"24h\",
    \"metadata\": { \"job\": \"capitals-demo\" }
  }")

echo "$BATCH"
# {
#   "id": "batch_01HZX...",
#   "object": "batch",
#   "status": "in_progress",
#   "input_file_id": "file-Abc123...",
#   "request_counts": { "total": 3, "completed": 0, "failed": 0 },
#   ...
# }

BATCH_ID=$(echo "$BATCH" | jq -r '.id')

4. Poll until complete

while true; do
  STATUS_JSON=$(curl -s "https://api.zerogpu.ai/v1/batches/$BATCH_ID" \
    -H "x-api-key: $ZGPU_API_KEY" \
    -H "x-project-id: $ZGPU_PROJECT_ID")
  STATUS=$(echo "$STATUS_JSON" | jq -r '.status')
  echo "$STATUS_JSON" | jq -c '{status, request_counts}'
  case "$STATUS" in
    completed|failed|expired) break ;;
  esac
  sleep 30
done

echo "Final status: $STATUS"
OUTPUT_FILE_ID=$(echo "$STATUS_JSON" | jq -r '.output_file_id')
ERROR_FILE_ID=$(echo "$STATUS_JSON" | jq -r '.error_file_id')

5. Download the results

if [ "$OUTPUT_FILE_ID" != "null" ]; then
  curl -s "https://api.zerogpu.ai/v1/files/$OUTPUT_FILE_ID/content" \
    -H "x-api-key: $ZGPU_API_KEY" \
    -H "x-project-id: $ZGPU_PROJECT_ID" \
    -o output.jsonl
  echo "--- output.jsonl ---"
  cat output.jsonl
fi

if [ "$ERROR_FILE_ID" != "null" ]; then
  curl -s "https://api.zerogpu.ai/v1/files/$ERROR_FILE_ID/content" \
    -H "x-api-key: $ZGPU_API_KEY" \
    -H "x-project-id: $ZGPU_PROJECT_ID" \
    -o errors.jsonl
  echo "--- errors.jsonl ---"
  cat errors.jsonl
fi

6. Extract the answers

jq -r '"\(.custom_id): \(.response.body.choices[0].message.content)"' output.jsonl
# q-1: Paris.
# q-2: Berlin.
# q-3: Rome.

Python OpenAI SDK walkthrough

The ZeroGPU Batch API is wire-compatible with OpenAI’s API. You can use the official openai Python SDK by pointing it at the ZeroGPU base URL and supplying the ZeroGPU auth headers via default_headers.

Install

pip install openai

Full end-to-end script

import json
import os
import time
from openai import OpenAI

API_KEY     = os.environ["ZGPU_API_KEY"]
PROJECT_ID  = os.environ["ZGPU_PROJECT_ID"]
MODEL_ID    = "<model-id>"

client = OpenAI(
    # The OpenAI SDK requires `api_key` to be a string. ZeroGPU ignores the
    # Authorization header and authenticates via x-api-key / x-project-id
    # below, so this value is effectively unused.
    api_key="ignored-by-zerogpu",
    base_url="https://api.zerogpu.ai/v1",
    default_headers={
        "x-api-key":    API_KEY,
        "x-project-id": PROJECT_ID,
    },
)

# 1. Build the input JSONL
questions = [
    ("q-1", "What is the capital of France?"),
    ("q-2", "What is the capital of Germany?"),
    ("q-3", "What is the capital of Italy?"),
]

with open("input.jsonl", "w") as f:
    for custom_id, question in questions:
        line = {
            "custom_id": custom_id,
            "method":    "POST",
            "url":       "/v1/chat/completions",
            "body": {
                "model":    MODEL_ID,
                "messages": [{"role": "user", "content": question}],
            },
        }
        f.write(json.dumps(line) + "\n")

# 2. Upload
uploaded = client.files.create(
    file=open("input.jsonl", "rb"),
    purpose="batch",
)
print(f"Uploaded {uploaded.id} ({uploaded.bytes} bytes)")

# 3. Create batch
batch = client.batches.create(
    input_file_id=uploaded.id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
    metadata={"job": "capitals-demo"},
)
print(f"Created {batch.id}: status={batch.status}, total={batch.request_counts.total}")

# 4. Poll until terminal
while batch.status not in ("completed", "failed", "expired"):
    time.sleep(30)
    batch = client.batches.retrieve(batch.id)
    print(
        f"  status={batch.status}, "
        f"completed={batch.request_counts.completed}/{batch.request_counts.total}, "
        f"failed={batch.request_counts.failed}"
    )

print(f"Final status: {batch.status}")

# 5. Download output
if batch.output_file_id:
    output = client.files.content(batch.output_file_id)
    with open("output.jsonl", "wb") as f:
        f.write(output.read())

if batch.error_file_id:
    errors = client.files.content(batch.error_file_id)
    with open("errors.jsonl", "wb") as f:
        f.write(errors.read())

# 6. Parse and print results
def load_jsonl(path):
    with open(path) as f:
        return [json.loads(line) for line in f if line.strip()]

if batch.output_file_id:
    for rec in load_jsonl("output.jsonl"):
        cid    = rec["custom_id"]
        answer = rec["response"]["body"]["choices"][0]["message"]["content"]
        print(f"OK   {cid}: {answer}")

if batch.error_file_id:
    for rec in load_jsonl("errors.jsonl"):
        cid  = rec["custom_id"]
        code = rec["error"]["code"]
        msg  = rec["error"]["message"]
        print(f"FAIL {cid} [{code}]: {msg}")

Sample output

Uploaded file-Abc123... (612 bytes)
Created batch_01HZX...: status=in_progress, total=3
  status=in_progress, completed=2/3, failed=0
  status=completed, completed=3/3, failed=0
Final status: completed
OK   q-1: Paris.
OK   q-2: Berlin.
OK   q-3: Rome.

SDK notes

  • api_key: The OpenAI SDK requires a non-empty api_key string. ZeroGPU ignores the Authorization header and reads x-api-key instead, so any string works. Use a placeholder like "ignored-by-zerogpu" to make the intent obvious.
  • base_url: Must include the /v1 suffix because the SDK appends paths like /files and /batches directly to the base URL.
  • default_headers: This is how the OpenAI SDK lets you attach custom headers to every request, exactly what’s needed for x-api-key and x-project-id.
  • client.batches.create(...): The SDK call accepts the same fields as the raw API: input_file_id, endpoint, completion_window, metadata.
  • client.files.content(file_id).read(): Returns the raw JSONL bytes. Iterate .iter_lines() if you prefer to stream rather than load it all.
  • client.batches.cancel(batch_id): Cancels an in-flight batch. See Batches API.

Worked example, bulk chat-completion batch

A more realistic example: send 1,000 prompts and collect the answers into a CSV.
import csv
import json
import os
import time
from openai import OpenAI

client = OpenAI(
    api_key="ignored-by-zerogpu",
    base_url="https://api.zerogpu.ai/v1",
    default_headers={
        "x-api-key":    os.environ["ZGPU_API_KEY"],
        "x-project-id": os.environ["ZGPU_PROJECT_ID"],
    },
)

# Load your data, anything that yields (doc_id, prompt) tuples
prompts = load_my_dataset()  # 1000 records

# 1. Write JSONL
with open("chat.jsonl", "w") as f:
    for doc_id, prompt in prompts:
        line = {
            "custom_id": doc_id,
            "method":    "POST",
            "url":       "/v1/chat/completions",
            "body":      {
                "model": "<your-model-id>",
                "messages": [{"role": "user", "content": prompt}],
            },
        }
        f.write(json.dumps(line) + "\n")

# 2-3. Upload + create
uploaded = client.files.create(file=open("chat.jsonl", "rb"), purpose="batch")
batch    = client.batches.create(
    input_file_id=uploaded.id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
)

# 4. Poll
while batch.status not in ("completed", "failed", "expired", "cancelled"):
    time.sleep(60)
    batch = client.batches.retrieve(batch.id)
    print(f"{batch.status}: {batch.request_counts.completed}/{batch.request_counts.total}")

if batch.status != "completed":
    raise SystemExit(f"Batch ended with status {batch.status}")

# 5. Download and parse
output_bytes = client.files.content(batch.output_file_id).read()
results = {}
for raw in output_bytes.decode().splitlines():
    rec = json.loads(raw)
    answer = rec["response"]["body"]["choices"][0]["message"]["content"]
    results[rec["custom_id"]] = answer

# 6. Write CSV
with open("results.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["doc_id", "answer"])
    for doc_id, ans in results.items():
        writer.writerow([doc_id, ans])

Next steps

JSONL format →

Exact line schema for input, output, and error files.

Supported endpoint →

Body and response shape for /v1/chat/completions.

Errors reference →

How to recover from failed lines without re-running the whole batch.