Generate Chat Completions

curl --request POST \
  --url https://api.nugen.in/api/v3/inference/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "model": "nugen-flash-instruct",
  "messages": [
    {
      "content": "<string>",
      "name": "<string>"
    }
  ],
  "max_tokens": "2000",
  "prompt_truncate_len": 1500,
  "temperature": 1,
  "stream": false,
  "tools": [
    {}
  ],
  "tool_choice": "<string>",
  "top_p": 123,
  "top_k": 123,
  "n": 1,
  "reasoning": {
    "max_tokens": 123,
    "exclude": false,
    "enabled": true
  },
  "stream_options": {}
}
'

import requests

url = "https://api.nugen.in/api/v3/inference/chat/completions"

payload = {
    "model": "nugen-flash-instruct",
    "messages": [
        {
            "content": "<string>",
            "name": "<string>"
        }
    ],
    "max_tokens": "2000",
    "prompt_truncate_len": 1500,
    "temperature": 1,
    "stream": False,
    "tools": [{}],
    "tool_choice": "<string>",
    "top_p": 123,
    "top_k": 123,
    "n": 1,
    "reasoning": {
        "max_tokens": 123,
        "exclude": False,
        "enabled": True
    },
    "stream_options": {}
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    model: 'nugen-flash-instruct',
    messages: [{content: '<string>', name: '<string>'}],
    max_tokens: '2000',
    prompt_truncate_len: 1500,
    temperature: 1,
    stream: false,
    tools: [{}],
    tool_choice: '<string>',
    top_p: 123,
    top_k: 123,
    n: 1,
    reasoning: {max_tokens: 123, exclude: false, enabled: true},
    stream_options: {}
  })
};

fetch('https://api.nugen.in/api/v3/inference/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.nugen.in/api/v3/inference/chat/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'model' => 'nugen-flash-instruct',
    'messages' => [
        [
                'content' => '<string>',
                'name' => '<string>'
        ]
    ],
    'max_tokens' => '2000',
    'prompt_truncate_len' => 1500,
    'temperature' => 1,
    'stream' => false,
    'tools' => [
        [
                
        ]
    ],
    'tool_choice' => '<string>',
    'top_p' => 123,
    'top_k' => 123,
    'n' => 1,
    'reasoning' => [
        'max_tokens' => 123,
        'exclude' => false,
        'enabled' => true
    ],
    'stream_options' => [
        
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.nugen.in/api/v3/inference/chat/completions"

	payload := strings.NewReader("{\n  \"model\": \"nugen-flash-instruct\",\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"name\": \"<string>\"\n    }\n  ],\n  \"max_tokens\": \"2000\",\n  \"prompt_truncate_len\": 1500,\n  \"temperature\": 1,\n  \"stream\": false,\n  \"tools\": [\n    {}\n  ],\n  \"tool_choice\": \"<string>\",\n  \"top_p\": 123,\n  \"top_k\": 123,\n  \"n\": 1,\n  \"reasoning\": {\n    \"max_tokens\": 123,\n    \"exclude\": false,\n    \"enabled\": true\n  },\n  \"stream_options\": {}\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.nugen.in/api/v3/inference/chat/completions")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"model\": \"nugen-flash-instruct\",\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"name\": \"<string>\"\n    }\n  ],\n  \"max_tokens\": \"2000\",\n  \"prompt_truncate_len\": 1500,\n  \"temperature\": 1,\n  \"stream\": false,\n  \"tools\": [\n    {}\n  ],\n  \"tool_choice\": \"<string>\",\n  \"top_p\": 123,\n  \"top_k\": 123,\n  \"n\": 1,\n  \"reasoning\": {\n    \"max_tokens\": 123,\n    \"exclude\": false,\n    \"enabled\": true\n  },\n  \"stream_options\": {}\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.nugen.in/api/v3/inference/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"model\": \"nugen-flash-instruct\",\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"name\": \"<string>\"\n    }\n  ],\n  \"max_tokens\": \"2000\",\n  \"prompt_truncate_len\": 1500,\n  \"temperature\": 1,\n  \"stream\": false,\n  \"tools\": [\n    {}\n  ],\n  \"tool_choice\": \"<string>\",\n  \"top_p\": 123,\n  \"top_k\": 123,\n  \"n\": 1,\n  \"reasoning\": {\n    \"max_tokens\": 123,\n    \"exclude\": false,\n    \"enabled\": true\n  },\n  \"stream_options\": {}\n}"

response = http.request(request)
puts response.read_body

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>"
    }
  ]
}

Inference

Generate Chat Completions

Generate conversational responses with streaming support and multimodal capabilities.

This endpoint generates chat completions using language models, supporting both text-only conversations and vision inputs. Includes support for function calling, streaming responses, and automatic conversation tracking.

Request Body:

model: Model ID (required) - Base models (e.g., nugen-flash-instruct) or your aligned model ID (e.g., model_customer_support_alignment_01kjy6s8n9r8cnx)
messages: Array of message objects (required, minimum 1), each containing:
- role: Message role (system, user, or assistant)
- content: Text string OR array of content objects (for multimodal)
  - Text: {"type": "text", "text": "your message"}
  - Image URL: {"type": "image_url", "image_url": {"url": "https://..."}}
  - Image Base64: {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}
- name (optional): Author name (a-z, A-Z, 0-9, underscores, max 64 chars)
max_tokens (optional): Maximum tokens to generate in completion
prompt_truncate_len (optional): Size to truncate chat prompts (default: 1500)
temperature (optional): Sampling temperature between 0 and 2 (default: 1)
stream (optional): Enable streaming responses (default: false)
tools (optional): List of tools/functions available to the model
tool_choice (optional): Tool selection mode (auto, none, or specific tool)
top_p (optional): Nucleus sampling parameter
top_k (optional): Top-k sampling parameter
n (optional): Number of completions to generate (default: 1)
reasoning (optional): Reasoning configuration object with:
- effort: Reasoning effort level (xhigh, high, medium, low, minimal, none)
- max_tokens: Token limit for reasoning
- exclude: Set true to exclude reasoning tokens from response
- enabled: Enable reasoning with default parameters

Optional Headers:

X-Session-ID: Session identifier for multi-turn conversation tracking

Returns:

Non-streaming mode -

id: Unique identifier for the response
object: Object type (always chat.completion)
created: Unix timestamp when completion was created
model: Model used for chat completion
choices: List of completion choices, each containing:
- index: Index of the choice
- message: Response message with:
  - role: Role of the author (always assistant)
  - content: Generated response text
  - tool_calls (optional): Tool calls made by the model (for function calling)
- finish_reason: Reason model stopped (stop for natural stop, length if max tokens reached)
usage: Token usage statistics:
- prompt_tokens: Number of tokens in the prompt
- completion_tokens: Number of tokens generated
- total_tokens: Total tokens used (prompt + completion)
confidence_score (optional): Confidence score from Domain-Aligned AI models

Streaming mode - ChatCompletionChunk stream with:

id: Unique identifier
created: Timestamp
model: Model ID
choices: List of chunk choices with:
- index: Choice index
- delta: Delta content with role and content
- finish_reason: Reason for stopping (only in final chunk)
usage (optional): Only present in final chunk

Example Request (Text Chat):

POST /api/v3/inference/chat/completions
Headers: {"Authorization": "Bearer <api_key>"}

{
  "model": "nugen-flash-instruct",
  "messages": [
    {
      "role": "system",
      "content": "You are a helpful assistant."
    },
    {
      "role": "user",
      "content": "What is the capital of France?"
    }
  ],
  "max_tokens": 500,
  "temperature": 0.7
}

Example Response (Text Chat):

{
  "id": "nugen-abc123",
  "object": "chat.completion",
  "created": 1704123600.0,
  "model": "nugen-flash-instruct",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "The capital of France is Paris."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 20,
    "completion_tokens": 8,
    "total_tokens": 28
  },
  "confidence_score": 89.5454
}

Example Request (Using Domain-Aligned Model):

POST /api/v3/inference/chat/completions
Headers: {"Authorization": "Bearer <api_key>"}

{
  "model": "aligned-model-01kmqm4nrn9fw6r",
  "messages": [
    {
      "role": "user",
      "content": "How do I return a product?"
    }
  ],
  "max_tokens": 500,
  "temperature": 0.7
}

Example Response (Domain-Aligned Model):

{
  "id": "nugen-abc456",
  "object": "chat.completion",
  "created": 1704123700.0,
  "model": "model_customer_support_alignment_01kjy6s8n9r8cnx",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "To return a product, please visit our returns portal within 30 days of purchase with your order number and receipt."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 15,
    "completion_tokens": 25,
    "total_tokens": 40
  },
  "confidence_score": 95.8
}

How to get your aligned model ID:

Create an alignment project using POST /api/v3/alignment-project/create
Poll GET /api/v3/alignment-project/status/{id} until status is COMPLETED
Use the model_id field from the response (e.g., "model_customer_support_alignment_01kjy6s8n9r8cnx")
Deploy the model using POST /api/v3/models/deploy-model/{model_id} (optional - for evaluation/production use)
Use the model_id in the model parameter for inference requests

Example Request (Vision - Image URL):

POST /api/v3/inference/chat/completions
Headers: {"Authorization": "Bearer <api_key>"}

{
  "model": "aligned-model-01kmqm4nrn9fw6r",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "Describe what you see in this image."
        },
        {
          "type": "image_url",
          "image_url": {
            "url": "https://example.com/image.jpg"
          }
        }
      ]
    }
  ],
  "max_tokens": 1000
}

Example Request (Vision - Base64 Image):

POST /api/v3/inference/chat/completions
Headers: {"Authorization": "Bearer <api_key>"}

{
  "model": "aligned-model-01kmqm4nrn9fw6r",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "What's in this image?"
        },
        {
          "type": "image_url",
          "image_url": {
            "url": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEAYABgAAD..."
          }
        }
      ]
    }
  ]
}

Example Request (Function Calling):

POST /api/v3/inference/chat/completions
Headers: {"Authorization": "Bearer <api_key>"}

{
  "model": "aligned-model-01kmqm4nrn9fw6r",
  "messages": [
    {"role": "user", "content": "What's the weather in Boston?"}
  ],
  "tools": [
    {
      "type": "function",
      "function": {
        "name": "get_weather",
        "description": "Get current weather",
        "parameters": {
          "type": "object",
          "properties": {
            "location": {"type": "string"}
          }
        }
      }
    }
  ],
  "tool_choice": "auto"
}

Example Request (Streaming):

POST /api/v3/inference/chat/completions
Headers: {"Authorization": "Bearer <api_key>"}

{
  "model": "aligned-model-01kmqm4nrn9fw6r",
  "messages": [{"role": "user", "content": "Tell me a story"}],
  "stream": true
}

Example Response (Streaming):

data: {"id":"nugen-abc123","created":1704123600.0,"model":"nugen-flash-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":"Once"},"finish_reason":null}]}

data: {"id":"nugen-abc123","created":1704123600.0,"model":"nugen-flash-instruct","choices":[{"index":0,"delta":{"content":" upon"},"finish_reason":null}]}

data: {"id":"nugen-abc123","created":1704123600.0,"model":"nugen-flash-instruct","choices":[{"index":0,"delta":{"content":" a time"},"finish_reason":"stop"}],"usage":{"prompt_tokens":10,"completion_tokens":5,"total_tokens":15}}

data: [DONE]

Notes:

Supports both text-only and vision/multimodal inputs depending on the model
Using Domain-Aligned Models: Pass the model_id from your completed alignment project as the model parameter. Get your aligned model IDs from GET /api/v3/alignment-project/status/{id} or GET /api/v3/models/aligned
Function calling enables models to invoke external tools via tools and tool_choice parameters
confidence_score is only available for Domain-Aligned AI models and indicates model certainty (0-100)
For streaming, usage statistics are included only in the final chunk
Conversations are automatically saved to S3 if enabled in user settings
Include X-Session-ID header for multi-turn conversation tracking
Use prompt_truncate_len to control context window usage for long conversations
The reasoning parameter enables advanced reasoning capabilities for supported models

POST

api

inference

chat

completions

Generate Chat Completions

curl --request POST \
  --url https://api.nugen.in/api/v3/inference/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "model": "nugen-flash-instruct",
  "messages": [
    {
      "content": "<string>",
      "name": "<string>"
    }
  ],
  "max_tokens": "2000",
  "prompt_truncate_len": 1500,
  "temperature": 1,
  "stream": false,
  "tools": [
    {}
  ],
  "tool_choice": "<string>",
  "top_p": 123,
  "top_k": 123,
  "n": 1,
  "reasoning": {
    "max_tokens": 123,
    "exclude": false,
    "enabled": true
  },
  "stream_options": {}
}
'

import requests

url = "https://api.nugen.in/api/v3/inference/chat/completions"

payload = {
    "model": "nugen-flash-instruct",
    "messages": [
        {
            "content": "<string>",
            "name": "<string>"
        }
    ],
    "max_tokens": "2000",
    "prompt_truncate_len": 1500,
    "temperature": 1,
    "stream": False,
    "tools": [{}],
    "tool_choice": "<string>",
    "top_p": 123,
    "top_k": 123,
    "n": 1,
    "reasoning": {
        "max_tokens": 123,
        "exclude": False,
        "enabled": True
    },
    "stream_options": {}
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    model: 'nugen-flash-instruct',
    messages: [{content: '<string>', name: '<string>'}],
    max_tokens: '2000',
    prompt_truncate_len: 1500,
    temperature: 1,
    stream: false,
    tools: [{}],
    tool_choice: '<string>',
    top_p: 123,
    top_k: 123,
    n: 1,
    reasoning: {max_tokens: 123, exclude: false, enabled: true},
    stream_options: {}
  })
};

fetch('https://api.nugen.in/api/v3/inference/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.nugen.in/api/v3/inference/chat/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'model' => 'nugen-flash-instruct',
    'messages' => [
        [
                'content' => '<string>',
                'name' => '<string>'
        ]
    ],
    'max_tokens' => '2000',
    'prompt_truncate_len' => 1500,
    'temperature' => 1,
    'stream' => false,
    'tools' => [
        [
                
        ]
    ],
    'tool_choice' => '<string>',
    'top_p' => 123,
    'top_k' => 123,
    'n' => 1,
    'reasoning' => [
        'max_tokens' => 123,
        'exclude' => false,
        'enabled' => true
    ],
    'stream_options' => [
        
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.nugen.in/api/v3/inference/chat/completions"

	payload := strings.NewReader("{\n  \"model\": \"nugen-flash-instruct\",\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"name\": \"<string>\"\n    }\n  ],\n  \"max_tokens\": \"2000\",\n  \"prompt_truncate_len\": 1500,\n  \"temperature\": 1,\n  \"stream\": false,\n  \"tools\": [\n    {}\n  ],\n  \"tool_choice\": \"<string>\",\n  \"top_p\": 123,\n  \"top_k\": 123,\n  \"n\": 1,\n  \"reasoning\": {\n    \"max_tokens\": 123,\n    \"exclude\": false,\n    \"enabled\": true\n  },\n  \"stream_options\": {}\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.nugen.in/api/v3/inference/chat/completions")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"model\": \"nugen-flash-instruct\",\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"name\": \"<string>\"\n    }\n  ],\n  \"max_tokens\": \"2000\",\n  \"prompt_truncate_len\": 1500,\n  \"temperature\": 1,\n  \"stream\": false,\n  \"tools\": [\n    {}\n  ],\n  \"tool_choice\": \"<string>\",\n  \"top_p\": 123,\n  \"top_k\": 123,\n  \"n\": 1,\n  \"reasoning\": {\n    \"max_tokens\": 123,\n    \"exclude\": false,\n    \"enabled\": true\n  },\n  \"stream_options\": {}\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.nugen.in/api/v3/inference/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"model\": \"nugen-flash-instruct\",\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"name\": \"<string>\"\n    }\n  ],\n  \"max_tokens\": \"2000\",\n  \"prompt_truncate_len\": 1500,\n  \"temperature\": 1,\n  \"stream\": false,\n  \"tools\": [\n    {}\n  ],\n  \"tool_choice\": \"<string>\",\n  \"top_p\": 123,\n  \"top_k\": 123,\n  \"n\": 1,\n  \"reasoning\": {\n    \"max_tokens\": 123,\n    \"exclude\": false,\n    \"enabled\": true\n  },\n  \"stream_options\": {}\n}"

response = http.request(request)
puts response.read_body

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>"
    }
  ]
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Headers

X-Session-ID

string | null

X-Request-Time

string | null

Body

application/json

model

string

required

The name of the model to use.

Example:

"nugen-flash-instruct"

messages

ChatTextRequestMessage · object[]

required

A list of messages comprising the conversation so far.

Minimum array length: 1

Show child attributes

max_tokens

integer | null

default:2000

The maximum number of tokens to generate in the completion.

prompt_truncate_len

integer | null

default:1500

The size to which to truncate chat prompts.

temperature

number | null

default:1

What sampling temperature to use, between 0 and 2.

Required range: 0 <= x <= 2

stream

boolean | null

default:false

Whether to stream back partial progress as server-sent events.

tools

Tools · object[] | null

List of tools/functions

tool_choice

'auto', 'none', or specific tool

top_p

number | null

Nucleus sampling

top_k

integer | null

Top-k sampling

integer | null

default:1

Number of completions

reasoning

ReasoningFields · object | null

Reasoning configuration for the model

Show child attributes

stream_options

Stream Options · object | null

Options for streaming responses, e.g., {'include_usage': true}

Response

Streaming chat completion responses or complete response depending on stream parameter

Generate Completions Rerank Documents

Alignment Project

Models

Agents

Documents

Benchmark

Evaluations

Inference

Generate Chat Completions

Authorizations

Headers

Body

Response