Generate Completions

curl --request POST \
  --url https://api.nugen.in/api/v3/inference/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "max_tokens": 400,
  "model": "nugen-flash-instruct",
  "prompt": "The sky is",
  "stream": false,
  "temperature": 1
}
'

import requests

url = "https://api.nugen.in/api/v3/inference/completions"

payload = {
    "max_tokens": 400,
    "model": "nugen-flash-instruct",
    "prompt": "The sky is",
    "stream": False,
    "temperature": 1
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    max_tokens: 400,
    model: 'nugen-flash-instruct',
    prompt: 'The sky is',
    stream: false,
    temperature: 1
  })
};

fetch('https://api.nugen.in/api/v3/inference/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.nugen.in/api/v3/inference/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'max_tokens' => 400,
    'model' => 'nugen-flash-instruct',
    'prompt' => 'The sky is',
    'stream' => false,
    'temperature' => 1
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.nugen.in/api/v3/inference/completions"

	payload := strings.NewReader("{\n  \"max_tokens\": 400,\n  \"model\": \"nugen-flash-instruct\",\n  \"prompt\": \"The sky is\",\n  \"stream\": false,\n  \"temperature\": 1\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.nugen.in/api/v3/inference/completions")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"max_tokens\": 400,\n  \"model\": \"nugen-flash-instruct\",\n  \"prompt\": \"The sky is\",\n  \"stream\": false,\n  \"temperature\": 1\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.nugen.in/api/v3/inference/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"max_tokens\": 400,\n  \"model\": \"nugen-flash-instruct\",\n  \"prompt\": \"The sky is\",\n  \"stream\": false,\n  \"temperature\": 1\n}"

response = http.request(request)
puts response.read_body

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>"
    }
  ]
}

Inference

Generate Completions

Generate text completions with streaming support.

This endpoint generates text completions using specified language models. Supports both streaming (real-time response chunks) and non-streaming modes. Ideal for text generation, continuation, and completion tasks.

Request Body:

model: Model ID for text generation (required) - Base models (e.g., nugen-flash-instruct) or your aligned model ID
prompt: Input to complete (required) - Can be:
- Single string
- List of strings
- Array of integers (tokenized prompt)
- Array of integer arrays (batch of tokenized prompts)
max_tokens (optional): Maximum tokens to generate (default: 16, minimum: 0)
temperature (optional): Sampling temperature between 0 and 2 (default: 1). Higher values like 0.8 make output more random, lower values like 0.2 make it more focused and deterministic
stream (optional): Enable streaming responses (default: false)

Optional Headers:

X-Session-ID: Session identifier for conversation tracking

Returns:

Non-streaming mode -

id: Unique identifier for the response
object: Object type (always text_completion)
created: Unix timestamp when response was generated
model: Model ID used for completion
choices: List of completion choices, each containing:
- text: Generated completion text
- index: Index of this choice
- finish_reason: Reason model stopped (stop for natural stop point, length if max tokens reached)
usage: Token usage statistics:
- prompt_tokens: Number of tokens in the prompt
- completion_tokens: Number of tokens generated
- total_tokens: Total tokens used (prompt + completion)
confidence_score (optional): Confidence score from Domain-Aligned AI models

Streaming mode - Server-sent events (SSE) stream with StreamingCompletionResponsev2 chunks containing:

id: Response identifier
object: Always text_completion
created: Timestamp
model: Model ID
choices: Completion chunks
usage (optional): Only in final chunk
confidence_score (optional): Only in final chunk

Example Request (Non-streaming):

POST /api/v3/inference/completions
Headers: {"Authorization": "Bearer <api_key>"}

{
  "model": "aligned-model-01kmqm4nrn9fw6r",
  "prompt": "Write a haiku about programming:",
  "max_tokens": 100,
  "temperature": 0.7
}

Example Response (Non-streaming):

{
  "id": "nugen-abc123",
  "object": "text_completion",
  "created": 1704123600.0,
  "model": "aligned-model-01kmqm4nrn9fw6r",
  "choices": [
    {
      "text": "Code flows like water,
Bugs emerge, then disappear,
Debug and refine.",
      "index": 0,
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 7,
    "completion_tokens": 18,
    "total_tokens": 25
  },
  "confidence_score": 86.5221
}

Example Request (Streaming):

POST /api/v3/inference/completions
Headers: {"Authorization": "Bearer <api_key>"}

{
  "model": "aligned-model-01kmqm4nrn9fw6r",
  "prompt": "Write a haiku about programming:",
  "max_tokens": 100,
  "temperature": 0.7,
  "stream": true
}

Example Response (Streaming):

data: {"id":"nugen-aligned-model-01kmqm4nrn9fw6r-abc123","object":"text_completion","created":1704123600.0,"model":"nugen-flash-instruct","choices":[{"text":"Code","index":0,"finish_reason":null}]}

data: {"id":"nugen-aligned-model-01kmqm4nrn9fw6r-abc123","object":"text_completion","created":1704123600.0,"model":"nugen-flash-instruct","choices":[{"text":" flows","index":0,"finish_reason":null}]}

data: {"id":"nugen-aligned-model-01kmqm4nrn9fw6r-abc123","object":"text_completion","created":1704123600.0,"model":"nugen-flash-instruct","choices":[{"text":" like water","index":0,"finish_reason":"stop"}],"usage":{"prompt_tokens":7,"completion_tokens":18,"total_tokens":25},"confidence_score":86.5221}

data: [DONE]

Example Request (Tokenized Prompt):

POST /api/v3/inference/completions
Headers: {"Authorization": "Bearer <api_key>"}

{
  "model": "aligned-model-01kmqm4nrn9fw6r",
  "prompt": [1014, 6766, 318],
  "max_tokens": 50,
  "temperature": 1
}

Notes:

Using Domain-Aligned Models: Pass the model_id from your completed alignment project as the model parameter. Get your aligned model IDs from GET /api/v3/alignment-project/status/{id} or GET /api/v3/models/aligned
Streaming provides real-time response generation for better user experience
The prompt parameter accepts pre-tokenized inputs for advanced use cases
Default max_tokens is 16 - adjust based on your needs
Temperature controls randomness: 0 = deterministic, 2 = very random
confidence_score is only available for Domain-Aligned AI models and indicates model certainty (0-100)
Include X-Session-ID header for multi-turn conversation tracking
Token usage is tracked and billed per request

POST

api

inference

completions

Generate Completions

curl --request POST \
  --url https://api.nugen.in/api/v3/inference/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "max_tokens": 400,
  "model": "nugen-flash-instruct",
  "prompt": "The sky is",
  "stream": false,
  "temperature": 1
}
'

import requests

url = "https://api.nugen.in/api/v3/inference/completions"

payload = {
    "max_tokens": 400,
    "model": "nugen-flash-instruct",
    "prompt": "The sky is",
    "stream": False,
    "temperature": 1
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    max_tokens: 400,
    model: 'nugen-flash-instruct',
    prompt: 'The sky is',
    stream: false,
    temperature: 1
  })
};

fetch('https://api.nugen.in/api/v3/inference/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.nugen.in/api/v3/inference/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'max_tokens' => 400,
    'model' => 'nugen-flash-instruct',
    'prompt' => 'The sky is',
    'stream' => false,
    'temperature' => 1
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.nugen.in/api/v3/inference/completions"

	payload := strings.NewReader("{\n  \"max_tokens\": 400,\n  \"model\": \"nugen-flash-instruct\",\n  \"prompt\": \"The sky is\",\n  \"stream\": false,\n  \"temperature\": 1\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.nugen.in/api/v3/inference/completions")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"max_tokens\": 400,\n  \"model\": \"nugen-flash-instruct\",\n  \"prompt\": \"The sky is\",\n  \"stream\": false,\n  \"temperature\": 1\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.nugen.in/api/v3/inference/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"max_tokens\": 400,\n  \"model\": \"nugen-flash-instruct\",\n  \"prompt\": \"The sky is\",\n  \"stream\": false,\n  \"temperature\": 1\n}"

response = http.request(request)
puts response.read_body

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>"
    }
  ]
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Headers

X-Session-ID

string | null

Body

application/json

model

string

required

The name of the model to use.

Example:

"nugen-flash-instruct"

prompt

required

The prompt to generate completions for. It can be a single string or a list of strings. It can also be an array of integers or an array of integer arrays, which allows to pass already tokenized prompt.

Example:

"The sky is"

max_tokens

integer | null

default:16

The maximum number of tokens to generate in the completion.

Required range: x >= 0

Example:

400

temperature

number | null

default:1

What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.

Required range: 0 <= x <= 2

Example:

1

stream

boolean | null

default:false

Whether to stream back partial progress as server-sent events.

stream_options

Stream Options · object | null

Options for streaming responses, e.g., {'include_usage': true}

Response

Streaming text completion responses or complete response depending on stream parameter

Generate Text Embeddings Generate Chat Completions

Alignment Project

Models

Agents

Documents

Benchmark

Evaluations

Inference

Generate Completions

Authorizations

Headers

Body

Response