Get Evaluation Results

curl --request GET \
  --url https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results \
  --header 'Authorization: Bearer <token>'

import requests

url = "https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results"

headers = {"Authorization": "Bearer <token>"}

response = requests.get(url, headers=headers)

print(response.text)

const options = {method: 'GET', headers: {Authorization: 'Bearer <token>'}};

fetch('https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.get("https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results")
  .header("Authorization", "Bearer <token>")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

{
  "evaluation_id": "eval-abc123",
  "model_id": "model-xyz789",
  "benchmark_id": "benchmark-abc123",
  "status": "READY",
  "raw_answers_count": 100,
  "completed_at": "2024-02-24T10:05:00Z",
  "metrics": {},
  "method": "eval-compare",
  "model_id_2": "model-def456",
  "base_model": "nugen-flash-instruct",
  "eval_model": {
    "accuracy": 0.85
  },
  "comparison": {
    "accuracy_difference": 0.05
  }
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>"
    }
  ]
}

Evaluations

Get Evaluation Results

Retrieve the complete results of a finished evaluation.

This endpoint returns detailed evaluation metrics and scores for a completed evaluation. Works for both single model evaluations and comparison mode evaluations.

Path Parameters:

evaluation_id: Unique evaluation identifier

Returns:

evaluation_id: The evaluation identifier
model_id: Primary model that was evaluated
benchmark_id: Benchmark that was used
status: Evaluation status (should be READY)
raw_answers_count: Number of raw answers generated during evaluation
completed_at: ISO timestamp when evaluation finished
method (optional): Evaluation method (eval for single model, eval-compare for comparison)
metrics (optional): Evaluation metrics and scores (single model only)
model_id_2 (optional): Second model ID (comparison mode only)
base_model (optional): Base model results (comparison mode only)
eval_model (optional): Eval model results (comparison mode only)
comparison (optional): Comparison results between models (comparison mode only)

Raises:

404: If evaluation not found or doesn’t belong to the authenticated user
400: If evaluation is not yet completed

Example Request:

GET /api/v3/evaluations/eval-xyz789/results
Headers: {"Authorization": "Bearer <api_key>"}

Example Response (Single Model):

{
  "evaluation_id": "eval-xyz789",
  "model_id": "aligned-model-01kmqm4nrn9fw6r",
  "benchmark_id": "task-abc123",
  "status": "READY",
  "method": "eval",
  "raw_answers_count": 10,
  "completed_at": "2024-01-15T10:45:00Z",
  "metrics": {
    "accuracy": 0.92,
    "relevance": 0.88,
    "average_score": 0.90,
    "total_questions": 10,
    "correct_answers": 9
  }
}

Example Response (Comparison Mode):

{
  "evaluation_id": "eval-xyz789",
  "model_id": "nugen-flash-instruct",
  "model_id_2": "aligned-model-01kmqm4nrn9fw6r",
  "benchmark_id": "task-abc123",
  "status": "READY",
  "method": "eval-compare",
  "raw_answers_count": 20,
  "completed_at": "2024-01-15T10:45:00Z",
  "base_model": {
    "model_id": "nugen-flash-instruct",
    "average_score": 0.92,
    "total_questions": 10
  },
  "eval_model": {
    "model_id": "aligned-model-123",
    "average_score": 0.85,
    "total_questions": 10
  },
  "comparison": {
    "winner": "nugen-flash-instruct",
    "score_difference": 0.07,
    "statistical_significance": true
  }
}

GET

api

evaluations

{evaluation_id}

results

Get Evaluation Results

curl --request GET \
  --url https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results \
  --header 'Authorization: Bearer <token>'

import requests

url = "https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results"

headers = {"Authorization": "Bearer <token>"}

response = requests.get(url, headers=headers)

print(response.text)

const options = {method: 'GET', headers: {Authorization: 'Bearer <token>'}};

fetch('https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.get("https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results")
  .header("Authorization", "Bearer <token>")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.nugen.in/api/v3/evaluations/{evaluation_id}/results")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

{
  "evaluation_id": "eval-abc123",
  "model_id": "model-xyz789",
  "benchmark_id": "benchmark-abc123",
  "status": "READY",
  "raw_answers_count": 100,
  "completed_at": "2024-02-24T10:05:00Z",
  "metrics": {},
  "method": "eval-compare",
  "model_id_2": "model-def456",
  "base_model": "nugen-flash-instruct",
  "eval_model": {
    "accuracy": 0.85
  },
  "comparison": {
    "accuracy_difference": 0.05
  }
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>"
    }
  ]
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Path Parameters

evaluation_id

string

required

Response

Returns detailed evaluation metrics and scores for a completed evaluation. This endpoint provides comprehensive results for a finished evaluation, including all relevant metrics, scores, and comparison data if applicable. Use this to analyze the performance of the evaluated model(s) against the benchmark once the evaluation is complete.

evaluation_id

string

required

Unique identifier for the evaluation

Example:

"eval-abc123"

model_id

string

required

ID of the model that was evaluated

Example:

"model-xyz789"

benchmark_id

string

required

Benchmark ID used

Example:

"benchmark-abc123"

status

string

required

Evaluation status

Example:

"READY"

raw_answers_count

integer

required

Number of raw answers generated

Example:

100

completed_at

string

required

ISO timestamp when evaluation completed

Example:

"2024-02-24T10:05:00Z"

metrics

Metrics · object | null

Evaluation metrics and scores (single model)

method

string | null

Evaluation method: 'eval' or 'eval-compare'

Example:

"eval-compare"

model_id_2

string | null

ID of second model (for comparison)

Example:

"model-def456"

base_model

Base Model · object | null

Base model results (comparison mode)

Example:

"nugen-flash-instruct"

eval_model

Eval Model · object | null

Eval model results (comparison mode)

Example:

{ "accuracy": 0.85 }

comparison

Comparison · object | null

Comparison results between models

Example:

{ "accuracy_difference": 0.05 }

Get Evaluation Status Download Evaluation Results

Alignment Project

Models

Agents

Documents

Benchmark

Evaluations

Inference

Get Evaluation Results

Authorizations

Path Parameters

Response