Chat completions

Chat-completions style inference

curl --request POST \
  --url https://api.zerogpu.ai/v1/chat/completions \
  --header 'Content-Type: application/json' \
  --header 'x-api-key: <api-key>' \
  --data '
{
  "model": "llama-3.1-8b-instruct-fast",
  "messages": [
    {
      "role": "user",
      "content": "NASA announced that its Artemis III mission is now scheduled for late 2026, marking the first time astronauts will land on the lunar surface since Apollo 17 in 1972. The mission will send a crew of four to the Moon aboard the Orion spacecraft, with two astronauts descending to the south pole using SpaceX Starship as a lunar lander. Scientists are particularly excited about exploring permanently shadowed craters that may contain water ice, which could be critical for sustaining long-term human presence on the Moon."
    }
  ]
}
'

import requests

url = "https://api.zerogpu.ai/v1/chat/completions"

payload = {
    "model": "llama-3.1-8b-instruct-fast",
    "messages": [
        {
            "role": "user",
            "content": "NASA announced that its Artemis III mission is now scheduled for late 2026, marking the first time astronauts will land on the lunar surface since Apollo 17 in 1972. The mission will send a crew of four to the Moon aboard the Orion spacecraft, with two astronauts descending to the south pole using SpaceX Starship as a lunar lander. Scientists are particularly excited about exploring permanently shadowed craters that may contain water ice, which could be critical for sustaining long-term human presence on the Moon."
        }
    ]
}
headers = {
    "x-api-key": "<api-key>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'x-api-key': '<api-key>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    model: 'llama-3.1-8b-instruct-fast',
    messages: [
      {
        role: 'user',
        content: 'NASA announced that its Artemis III mission is now scheduled for late 2026, marking the first time astronauts will land on the lunar surface since Apollo 17 in 1972. The mission will send a crew of four to the Moon aboard the Orion spacecraft, with two astronauts descending to the south pole using SpaceX Starship as a lunar lander. Scientists are particularly excited about exploring permanently shadowed craters that may contain water ice, which could be critical for sustaining long-term human presence on the Moon.'
      }
    ]
  })
};

fetch('https://api.zerogpu.ai/v1/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

false

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.zerogpu.ai/v1/chat/completions"

	payload := strings.NewReader("{\n  \"model\": \"llama-3.1-8b-instruct-fast\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"NASA announced that its Artemis III mission is now scheduled for late 2026, marking the first time astronauts will land on the lunar surface since Apollo 17 in 1972. The mission will send a crew of four to the Moon aboard the Orion spacecraft, with two astronauts descending to the south pole using SpaceX Starship as a lunar lander. Scientists are particularly excited about exploring permanently shadowed craters that may contain water ice, which could be critical for sustaining long-term human presence on the Moon.\"\n    }\n  ]\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("x-api-key", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

require 'uri'
require 'net/http'

url = URI("https://api.zerogpu.ai/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["x-api-key"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"model\": \"llama-3.1-8b-instruct-fast\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"NASA announced that its Artemis III mission is now scheduled for late 2026, marking the first time astronauts will land on the lunar surface since Apollo 17 in 1972. The mission will send a crew of four to the Moon aboard the Orion spacecraft, with two astronauts descending to the south pole using SpaceX Starship as a lunar lander. Scientists are particularly excited about exploring permanently shadowed craters that may contain water ice, which could be critical for sustaining long-term human presence on the Moon.\"\n    }\n  ]\n}"

response = http.request(request)
puts response.read_body

{
  "id": "chatcmpl_abc123",
  "object": "chat.completion",
  "created": 1710000000,
  "model": "llama-3.1-8b-instruct-fast",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Artemis III, slated for late 2026, will return astronauts to the Moon for the first time since 1972, landing two crew at the south pole to study shadowed craters that may hold water ice."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 112,
    "completion_tokens": 44,
    "total_tokens": 156
  }
}

{
  "error": {
    "code": "insufficient_quota",
    "message": "You have insufficient quota to complete this request."
  }
}

POST

chat

completions

Chat-completions style inference

curl --request POST \
  --url https://api.zerogpu.ai/v1/chat/completions \
  --header 'Content-Type: application/json' \
  --header 'x-api-key: <api-key>' \
  --data '
{
  "model": "llama-3.1-8b-instruct-fast",
  "messages": [
    {
      "role": "user",
      "content": "NASA announced that its Artemis III mission is now scheduled for late 2026, marking the first time astronauts will land on the lunar surface since Apollo 17 in 1972. The mission will send a crew of four to the Moon aboard the Orion spacecraft, with two astronauts descending to the south pole using SpaceX Starship as a lunar lander. Scientists are particularly excited about exploring permanently shadowed craters that may contain water ice, which could be critical for sustaining long-term human presence on the Moon."
    }
  ]
}
'

import requests

url = "https://api.zerogpu.ai/v1/chat/completions"

payload = {
    "model": "llama-3.1-8b-instruct-fast",
    "messages": [
        {
            "role": "user",
            "content": "NASA announced that its Artemis III mission is now scheduled for late 2026, marking the first time astronauts will land on the lunar surface since Apollo 17 in 1972. The mission will send a crew of four to the Moon aboard the Orion spacecraft, with two astronauts descending to the south pole using SpaceX Starship as a lunar lander. Scientists are particularly excited about exploring permanently shadowed craters that may contain water ice, which could be critical for sustaining long-term human presence on the Moon."
        }
    ]
}
headers = {
    "x-api-key": "<api-key>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'x-api-key': '<api-key>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    model: 'llama-3.1-8b-instruct-fast',
    messages: [
      {
        role: 'user',
        content: 'NASA announced that its Artemis III mission is now scheduled for late 2026, marking the first time astronauts will land on the lunar surface since Apollo 17 in 1972. The mission will send a crew of four to the Moon aboard the Orion spacecraft, with two astronauts descending to the south pole using SpaceX Starship as a lunar lander. Scientists are particularly excited about exploring permanently shadowed craters that may contain water ice, which could be critical for sustaining long-term human presence on the Moon.'
      }
    ]
  })
};

fetch('https://api.zerogpu.ai/v1/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

false

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.zerogpu.ai/v1/chat/completions"

	payload := strings.NewReader("{\n  \"model\": \"llama-3.1-8b-instruct-fast\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"NASA announced that its Artemis III mission is now scheduled for late 2026, marking the first time astronauts will land on the lunar surface since Apollo 17 in 1972. The mission will send a crew of four to the Moon aboard the Orion spacecraft, with two astronauts descending to the south pole using SpaceX Starship as a lunar lander. Scientists are particularly excited about exploring permanently shadowed craters that may contain water ice, which could be critical for sustaining long-term human presence on the Moon.\"\n    }\n  ]\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("x-api-key", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

require 'uri'
require 'net/http'

url = URI("https://api.zerogpu.ai/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["x-api-key"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"model\": \"llama-3.1-8b-instruct-fast\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"NASA announced that its Artemis III mission is now scheduled for late 2026, marking the first time astronauts will land on the lunar surface since Apollo 17 in 1972. The mission will send a crew of four to the Moon aboard the Orion spacecraft, with two astronauts descending to the south pole using SpaceX Starship as a lunar lander. Scientists are particularly excited about exploring permanently shadowed craters that may contain water ice, which could be critical for sustaining long-term human presence on the Moon.\"\n    }\n  ]\n}"

response = http.request(request)
puts response.read_body

{
  "id": "chatcmpl_abc123",
  "object": "chat.completion",
  "created": 1710000000,
  "model": "llama-3.1-8b-instruct-fast",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Artemis III, slated for late 2026, will return astronauts to the Moon for the first time since 1972, landing two crew at the south pole to study shadowed craters that may hold water ice."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 112,
    "completion_tokens": 44,
    "total_tokens": 156
  }
}

{
  "error": {
    "code": "insufficient_quota",
    "message": "You have insufficient quota to complete this request."
  }
}

Some models are invoked with a chat-completions style body (messages array) instead of the Responses input field. The dashboard and model catalog indicate which route applies. Each model page has its own playground with the right examples for that model. Install the official SDK from npm or PyPI (pip install zerogpu-api). Source: zerogpu/SDK. Response JSON shape depends on the model; handle errors the same way as API error codes.

Authorizations

x-api-key

string

header

required

Your ZeroGPU API key. Create one in the dashboard under API keys. Send it on every request.

Headers

x-project-id

string

Optional project identifier. Scopes the request to a specific project when provided.

Body

application/json

model

enum<string>

required

Model identifier. Open a model page for a dedicated playground with the correct body for that model.

Available options:

llama-3.1-8b-instruct-fast,

LFM2.5-1.2B-Instruct,

LFM2.5-1.2B-Thinking,

gpt-oss-120b,

qwen3-30b-a3b-fp8,

deberta-v3-small,

gliner2-base-v1,

gliner-multi-pii-v1,

zlm-v1-followup-questions-edge,

zlm-v1-iab-classify-edge,

zlm-v2-iab-classify-edge-enriched,

zlm-v1-iab-domain-classifier

Example:

"llama-3.1-8b-instruct-fast"

messages

object[]

required

Ordered list of messages making up the conversation so far.

Minimum array length: 1

Hide child attributes

messages.role

enum<string>

required

Role of the message author.

Available options:

system,

user,

assistant

messages.content

string<textarea>

required

The message text.

Required string length: 1 - 131072

metadata

object

Optional model-specific parameters, passed through to the model. For example, PII models accept mask and usecase. See the relevant model page for supported keys.

Response

Success

An OpenAI-compatible chat completion.

string

Unique identifier for the completion.

Example:

"chatcmpl_abc123"

object

string

Object type. Always chat.completion.

Example:

"chat.completion"

created

integer

Unix timestamp (seconds) when the completion was created.

Example:

1710000000

model

string

The model used for inference.

Example:

"llama-3.1-8b-instruct-fast"

choices

object[]

List of completion choices.

Hide child attributes

choices.index

integer

Position of this choice in the list.

Example:

0

choices.message

object

Hide child attributes

choices.message.role

enum<string>

required

Role of the message author.

Available options:

system,

user,

assistant

choices.message.content

string<textarea>

required

The message text.

Required string length: 1 - 131072

choices.finish_reason

string

Why the model stopped generating, for example stop or length.

Example:

"stop"

usage

object

Token usage statistics for the request.

Hide child attributes

usage.prompt_tokens

integer

Number of tokens in the prompt.

Example:

112

usage.completion_tokens

integer

Number of tokens in the generated completion.

Example:

44

usage.total_tokens

integer

Total tokens consumed (prompt plus completion).

Example:

156

Responses gpt-oss-120b

⌘I

Endpoints

ZeroClick

Batch API

Chat completions

Authorizations

Headers

Body

Response