Streaming Guide

Streaming allows you to receive responses from HelpingAI in real-time as they're generated, rather than waiting for the complete response. This creates a more interactive and responsive user experience, especially for longer responses.

How Streaming Works

When you set stream: true in your request, HelpingAI sends back Server-Sent Events (SSE) with partial response chunks. Each chunk contains a piece of the response as it's being generated.

Basic Streaming

Python (using requests)

python

import requests
import json

url = "https://api.helpingai.co/v1/chat/completions"
headers = {
    "Authorization": "Bearer YOUR_API_KEY",
    "Content-Type": "application/json"
}
data = {
    "model": "Dhanishtha-2.0-preview",
    "messages": [
        {"role": "user", "content": "Tell me a story about a brave knight"}
    ],
    "stream": True,
    "temperature": 0.8,
    "max_tokens": 500
}

response = requests.post(url, headers=headers, json=data, stream=True)

for line in response.iter_lines():
    if line:
        line = line.decode('utf-8')
        if line.startswith('data: '):
            data_str = line[6:]  # Remove 'data: ' prefix
            if data_str == '[DONE]':
                break
            try:
                chunk = json.loads(data_str)
                if chunk['choices'][0]['delta'].get('content'):
                    print(chunk['choices'][0]['delta']['content'], end='', flush=True)
            except json.JSONDecodeError:
                continue

Python (using OpenAI SDK)

python

from openai import OpenAI

client = OpenAI(
    base_url="https://api.helpingai.co/v1",
    api_key="YOUR_API_KEY"
)

stream = client.chat.completions.create(
    model="Dhanishtha-2.0-preview",
    messages=[
        {"role": "user", "content": "Tell me a story about a brave knight"}
    ],
    stream=True,
    temperature=0.8,
    max_tokens=500
)

for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="", flush=True)

Python (using HelpingAI SDK)

python

from helpingai import HelpingAI

client = HelpingAI(api_key="YOUR_API_KEY")

stream = client.chat.completions.create(
    model="Dhanishtha-2.0-preview",
    messages=[
        {"role": "user", "content": "Tell me a story about a brave knight"}
    ],
    stream=True,
    temperature=0.8,
    max_tokens=500
)

for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="", flush=True)

JavaScript (using axios)

javascript

const axios = require('axios');

async function streamResponse() {
  const response = await axios.post(
    'https://api.helpingai.co/v1/chat/completions',
    {
      model: 'Dhanishtha-2.0-preview',
      messages: [
        {role: 'user', content: 'Tell me a story about a brave knight'}
      ],
      stream: true,
      temperature: 0.8,
      max_tokens: 500
    },
    {
      headers: {
        'Authorization': 'Bearer YOUR_API_KEY',
        'Content-Type': 'application/json'
      },
      responseType: 'stream'
    }
  );

  response.data.on('data', (chunk) => {
    const lines = chunk.toString().split('\n');
    
    for (const line of lines) {
      if (line.startsWith('data: ')) {
        const data = line.slice(6);
        if (data === '[DONE]') {
          return;
        }
        
        try {
          const parsed = JSON.parse(data);
          if (parsed.choices[0].delta.content) {
            process.stdout.write(parsed.choices[0].delta.content);
          }
        } catch (error) {
          // Skip invalid JSON
        }
      }
    }
  });
}

streamResponse();

JavaScript (using OpenAI package)

javascript

import OpenAI from 'openai';

const openai = new OpenAI({
  baseURL: 'https://api.helpingai.co/v1',
  apiKey: 'YOUR_API_KEY'
});

async function main() {
  const stream = await openai.chat.completions.create({
    model: 'Dhanishtha-2.0-preview',
    messages: [
      {role: 'user', content: 'Tell me a story about a brave knight'}
    ],
    stream: true,
    temperature: 0.8,
    max_tokens: 500
  });

  for await (const chunk of stream) {
    if (chunk.choices[0]?.delta?.content) {
      process.stdout.write(chunk.choices[0].delta.content);
    }
  }
}

main();

JavaScript (using HelpingAI SDK)

javascript

import { HelpingAI } from 'helpingai';

const client = new HelpingAI({
  apiKey: 'YOUR_API_KEY'
});

async function main() {
  const stream = await client.chat.completions.create({
    model: 'Dhanishtha-2.0-preview',
    messages: [
      {role: 'user', content: 'Tell me a story about a brave knight'}
    ],
    stream: true,
    temperature: 0.8,
    max_tokens: 500
  });

  for await (const chunk of stream) {
    if (chunk.choices[0]?.delta?.content) {
      process.stdout.write(chunk.choices[0].delta.content);
    }
  }
}

main();

Stream Response Format

Each streaming chunk follows this format:

text

data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1677652288,"model":"Dhanishtha-2.0-preview","choices":[{"index":0,"delta":{"content":"Once"},"finish_reason":null}]}

data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1677652288,"model":"Dhanishtha-2.0-preview","choices":[{"index":0,"delta":{"content":" upon"},"finish_reason":null}]}

data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1677652288,"model":"Dhanishtha-2.0-preview","choices":[{"index":0,"delta":{"content":" a"},"finish_reason":null}]}

data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1677652288,"model":"Dhanishtha-2.0-preview","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}

data: [DONE]

Chunk Structure

Field	Type	Description
id	string	Unique identifier for the completion
object	string	Always "chat.completion.chunk"
created	integer	Unix timestamp
model	string	Model used
choices	array	Array of choice objects

Choice Object (Streaming)

Field	Type	Description
index	integer	Choice index
delta	object	Content delta for this chunk
finish_reason	string	Reason for completion (null until final chunk)

Delta Object

Field	Type	Description
content	string	Partial content for this chunk
role	string	Role (only in first chunk)

Advanced Streaming Examples

With Emotional Context

Python (using OpenAI SDK)

python

from openai import OpenAI

client = OpenAI(
    base_url="https://api.helpingai.co/v1",
    api_key="YOUR_API_KEY"
)

stream = client.chat.completions.create(
    model="Dhanishtha-2.0-preview",
    messages=[
        {"role": "system", "content": "You are a compassionate counselor."},
        {"role": "user", "content": "I'm feeling really anxious about my job interview tomorrow."}
    ],
    stream=True,
    temperature=0.7,
    max_tokens=300
)

print("AI Response: ", end="")
for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="", flush=True)
print()  # New line at the end

With Chain of Recursive Thoughts

Python (using OpenAI SDK)

python

from openai import OpenAI

client = OpenAI(
    base_url="https://api.helpingai.co/v1",
    api_key="YOUR_API_KEY"
)

stream = client.chat.completions.create(
    model="Dhanishtha-2.0-preview",
    messages=[
        {"role": "user", "content": "Solve this step by step: If a train travels 120 miles in 2 hours, what's its speed?"}
    ],
    stream=True,
    hideThink=False,  # Show reasoning process
    temperature=0.3,
    max_tokens=400
)

for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="", flush=True)

Error Handling in Streaming

Python Example

python

from openai import OpenAI
import json

client = OpenAI(
    base_url="https://api.helpingai.co/v1",
    api_key="YOUR_API_KEY"
)

try:
    stream = client.chat.completions.create(
        model="Dhanishtha-2.0-preview",
        messages=[
            {"role": "user", "content": "Hello!"}
        ],
        stream=True
    )
    
    for chunk in stream:
        if chunk.choices[0].delta.content is not None:
            print(chunk.choices[0].delta.content, end="", flush=True)
            
except Exception as e:
    print(f"Streaming error: {e}")

JavaScript Example

javascript

import OpenAI from 'openai';

const openai = new OpenAI({
  baseURL: 'https://api.helpingai.co/v1',
  apiKey: 'YOUR_API_KEY'
});

async function streamWithErrorHandling() {
  try {
    const stream = await openai.chat.completions.create({
      model: 'Dhanishtha-2.0-preview',
      messages: [
        {role: 'user', content: 'Hello!'}
      ],
      stream: true
    });

    for await (const chunk of stream) {
      if (chunk.choices[0]?.delta?.content) {
        process.stdout.write(chunk.choices[0].delta.content);
      }
    }
  } catch (error) {
    console.error('Streaming error:', error);
  }
}

streamWithErrorHandling();

Best Practices

1. Handle Connection Issues

Always implement retry logic for network failures:

python

import time
from openai import OpenAI

def stream_with_retry(client, messages, max_retries=3):
    for attempt in range(max_retries):
        try:
            stream = client.chat.completions.create(
                model="Dhanishtha-2.0-preview",
                messages=messages,
                stream=True
            )
            
            for chunk in stream:
                if chunk.choices[0].delta.content is not None:
                    print(chunk.choices[0].delta.content, end="", flush=True)
            break
            
        except Exception as e:
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)  # Exponential backoff
                continue
            else:
                raise e

2. Buffer Partial Responses

For UI applications, consider buffering chunks:

javascript

class StreamBuffer {
  constructor() {
    this.buffer = '';
    this.callbacks = [];
  }
  
  addChunk(content) {
    this.buffer += content;
    this.callbacks.forEach(callback => callback(this.buffer));
  }
  
  onUpdate(callback) {
    this.callbacks.push(callback);
  }
}

const buffer = new StreamBuffer();
buffer.onUpdate((content) => {
  document.getElementById('response').textContent = content;
});

// Use with streaming...
for await (const chunk of stream) {
  if (chunk.choices[0]?.delta?.content) {
    buffer.addChunk(chunk.choices[0].delta.content);
  }
}

3. Handle Finish Reasons

Check why the stream ended:

python

for chunk in stream:
    choice = chunk.choices[0]
    if choice.delta.content is not None:
        print(choice.delta.content, end="", flush=True)
    
    if choice.finish_reason:
        if choice.finish_reason == "stop":
            print("\n[Completed normally]")
        elif choice.finish_reason == "length":
            print("\n[Reached max tokens]")
        elif choice.finish_reason == "content_filter":
            print("\n[Content filtered]")

Performance Tips

Use appropriate buffer sizes for network efficiency
Implement proper backpressure handling
Consider connection pooling for multiple streams
Monitor token usage in real-time
Handle network interruptions gracefully

Next Steps

Tool Calling Guide - Function calling with streaming
Chain of Recursive Thoughts - Understanding AI thoughts
Error Handling - Robust error management
Chat Completions API - Complete API reference