Streaming Guide

Streaming allows you to receive responses from HelpingAI in real-time as they're generated, rather than waiting for the complete response. This creates a more interactive and responsive user experience, especially for longer responses.

How Streaming Works

When you set stream: true in your request, HelpingAI sends back Server-Sent Events (SSE) with partial response chunks. Each chunk contains a piece of the response as it's being generated.

Basic Streaming

Python (using requests)

Python
import requests
import json

url = "https://api.helpingai.co/v1/chat/completions"
headers = {
    "Authorization": "Bearer YOUR_API_KEY",
    "Content-Type": "application/json"
}
data = {
    "model": "Dhanishtha-2.0-preview",
    "messages": [
        {"role": "user", "content": "Tell me a story about a brave knight"}
    ],
    "stream": True,
    "temperature": 0.8,
    "max_tokens": 500
}

response = requests.post(url, headers=headers, json=data, stream=True)

for line in response.iter_lines():
    if line:
        line = line.decode('utf-8')
        if line.startswith('data: '):
            data_str = line[6:]  # Remove 'data: ' prefix
            if data_str == '[DONE]':
                break
            try:
                chunk = json.loads(data_str)
                if chunk['choices'][0]['delta'].get('content'):
                    print(chunk['choices'][0]['delta']['content'], end='', flush=True)
            except json.JSONDecodeError:
                continue

Python (using OpenAI SDK)

Python
from openai import OpenAI

client = OpenAI(
    base_url="https://api.helpingai.co/v1",
    api_key="YOUR_API_KEY"
)

stream = client.chat.completions.create(
    model="Dhanishtha-2.0-preview",
    messages=[
        {"role": "user", "content": "Tell me a story about a brave knight"}
    ],
    stream=True,
    temperature=0.8,
    max_tokens=500
)

for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="", flush=True)

Python (using HelpingAI SDK)

Python
from helpingai import HelpingAI

client = HelpingAI(api_key="YOUR_API_KEY")

stream = client.chat.completions.create(
    model="Dhanishtha-2.0-preview",
    messages=[
        {"role": "user", "content": "Tell me a story about a brave knight"}
    ],
    stream=True,
    temperature=0.8,
    max_tokens=500
)

for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="", flush=True)

JavaScript (using axios)

JavaScript
const axios = require('axios');

async function streamResponse() {
  const response = await axios.post(
    'https://api.helpingai.co/v1/chat/completions',
    {
      model: 'Dhanishtha-2.0-preview',
      messages: [
        {role: 'user', content: 'Tell me a story about a brave knight'}
      ],
      stream: true,
      temperature: 0.8,
      max_tokens: 500
    },
    {
      headers: {
        'Authorization': 'Bearer YOUR_API_KEY',
        'Content-Type': 'application/json'
      },
      responseType: 'stream'
    }
  );

  response.data.on('data', (chunk) => {
    const lines = chunk.toString().split('\n');
    
    for (const line of lines) {
      if (line.startsWith('data: ')) {
        const data = line.slice(6);
        if (data === '[DONE]') {
          return;
        }
        
        try {
          const parsed = JSON.parse(data);
          if (parsed.choices[0].delta.content) {
            process.stdout.write(parsed.choices[0].delta.content);
          }
        } catch (error) {
          // Skip invalid JSON
        }
      }
    }
  });
}

streamResponse();

JavaScript (using OpenAI package)

JavaScript
import OpenAI from 'openai';

const openai = new OpenAI({
  baseURL: 'https://api.helpingai.co/v1',
  apiKey: 'YOUR_API_KEY'
});

async function main() {
  const stream = await openai.chat.completions.create({
    model: 'Dhanishtha-2.0-preview',
    messages: [
      {role: 'user', content: 'Tell me a story about a brave knight'}
    ],
    stream: true,
    temperature: 0.8,
    max_tokens: 500
  });

  for await (const chunk of stream) {
    if (chunk.choices[0]?.delta?.content) {
      process.stdout.write(chunk.choices[0].delta.content);
    }
  }
}

main();

JavaScript (using HelpingAI SDK)

JavaScript
import { HelpingAI } from 'helpingai';

const client = new HelpingAI({
  apiKey: 'YOUR_API_KEY'
});

async function main() {
  const stream = await client.chat.completions.create({
    model: 'Dhanishtha-2.0-preview',
    messages: [
      {role: 'user', content: 'Tell me a story about a brave knight'}
    ],
    stream: true,
    temperature: 0.8,
    max_tokens: 500
  });

  for await (const chunk of stream) {
    if (chunk.choices[0]?.delta?.content) {
      process.stdout.write(chunk.choices[0].delta.content);
    }
  }
}

main();

Stream Response Format

Each streaming chunk follows this format:

Text
data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1677652288,"model":"Dhanishtha-2.0-preview","choices":[{"index":0,"delta":{"content":"Once"},"finish_reason":null}]}

data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1677652288,"model":"Dhanishtha-2.0-preview","choices":[{"index":0,"delta":{"content":" upon"},"finish_reason":null}]}

data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1677652288,"model":"Dhanishtha-2.0-preview","choices":[{"index":0,"delta":{"content":" a"},"finish_reason":null}]}

data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1677652288,"model":"Dhanishtha-2.0-preview","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}

data: [DONE]

Chunk Structure

FieldTypeDescription
idstringUnique identifier for the completion
objectstringAlways "chat.completion.chunk"
createdintegerUnix timestamp
modelstringModel used
choicesarrayArray of choice objects

Choice Object (Streaming)

FieldTypeDescription
indexintegerChoice index
deltaobjectContent delta for this chunk
finish_reasonstringReason for completion (null until final chunk)

Delta Object

FieldTypeDescription
contentstringPartial content for this chunk
rolestringRole (only in first chunk)

Advanced Streaming Examples

With Emotional Context

Python (using OpenAI SDK)

Python
from openai import OpenAI

client = OpenAI(
    base_url="https://api.helpingai.co/v1",
    api_key="YOUR_API_KEY"
)

stream = client.chat.completions.create(
    model="Dhanishtha-2.0-preview",
    messages=[
        {"role": "system", "content": "You are a compassionate counselor."},
        {"role": "user", "content": "I'm feeling really anxious about my job interview tomorrow."}
    ],
    stream=True,
    temperature=0.7,
    max_tokens=300
)

print("AI Response: ", end="")
for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="", flush=True)
print()  # New line at the end

With Chain of Recursive Thoughts

Python (using OpenAI SDK)

Python
from openai import OpenAI

client = OpenAI(
    base_url="https://api.helpingai.co/v1",
    api_key="YOUR_API_KEY"
)

stream = client.chat.completions.create(
    model="Dhanishtha-2.0-preview",
    messages=[
        {"role": "user", "content": "Solve this step by step: If a train travels 120 miles in 2 hours, what's its speed?"}
    ],
    stream=True,
    hideThink=False,  # Show reasoning process
    temperature=0.3,
    max_tokens=400
)

for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="", flush=True)

Error Handling in Streaming

Python Example

Python
from openai import OpenAI
import json

client = OpenAI(
    base_url="https://api.helpingai.co/v1",
    api_key="YOUR_API_KEY"
)

try:
    stream = client.chat.completions.create(
        model="Dhanishtha-2.0-preview",
        messages=[
            {"role": "user", "content": "Hello!"}
        ],
        stream=True
    )
    
    for chunk in stream:
        if chunk.choices[0].delta.content is not None:
            print(chunk.choices[0].delta.content, end="", flush=True)
            
except Exception as e:
    print(f"Streaming error: {e}")

JavaScript Example

JavaScript
import OpenAI from 'openai';

const openai = new OpenAI({
  baseURL: 'https://api.helpingai.co/v1',
  apiKey: 'YOUR_API_KEY'
});

async function streamWithErrorHandling() {
  try {
    const stream = await openai.chat.completions.create({
      model: 'Dhanishtha-2.0-preview',
      messages: [
        {role: 'user', content: 'Hello!'}
      ],
      stream: true
    });

    for await (const chunk of stream) {
      if (chunk.choices[0]?.delta?.content) {
        process.stdout.write(chunk.choices[0].delta.content);
      }
    }
  } catch (error) {
    console.error('Streaming error:', error);
  }
}

streamWithErrorHandling();

Best Practices

1. Handle Connection Issues

Always implement retry logic for network failures:

Python
import time
from openai import OpenAI

def stream_with_retry(client, messages, max_retries=3):
    for attempt in range(max_retries):
        try:
            stream = client.chat.completions.create(
                model="Dhanishtha-2.0-preview",
                messages=messages,
                stream=True
            )
            
            for chunk in stream:
                if chunk.choices[0].delta.content is not None:
                    print(chunk.choices[0].delta.content, end="", flush=True)
            break
            
        except Exception as e:
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)  # Exponential backoff
                continue
            else:
                raise e

2. Buffer Partial Responses

For UI applications, consider buffering chunks:

JavaScript
class StreamBuffer {
  constructor() {
    this.buffer = '';
    this.callbacks = [];
  }
  
  addChunk(content) {
    this.buffer += content;
    this.callbacks.forEach(callback => callback(this.buffer));
  }
  
  onUpdate(callback) {
    this.callbacks.push(callback);
  }
}

const buffer = new StreamBuffer();
buffer.onUpdate((content) => {
  document.getElementById('response').textContent = content;
});

// Use with streaming...
for await (const chunk of stream) {
  if (chunk.choices[0]?.delta?.content) {
    buffer.addChunk(chunk.choices[0].delta.content);
  }
}

3. Handle Finish Reasons

Check why the stream ended:

Python
for chunk in stream:
    choice = chunk.choices[0]
    if choice.delta.content is not None:
        print(choice.delta.content, end="", flush=True)
    
    if choice.finish_reason:
        if choice.finish_reason == "stop":
            print("\n[Completed normally]")
        elif choice.finish_reason == "length":
            print("\n[Reached max tokens]")
        elif choice.finish_reason == "content_filter":
            print("\n[Content filtered]")

Performance Tips

  1. Use appropriate buffer sizes for network efficiency
  2. Implement proper backpressure handling
  3. Consider connection pooling for multiple streams
  4. Monitor token usage in real-time
  5. Handle network interruptions gracefully

Next Steps