Real-world code examples showing how to integrate BIAS into your LLM applications.
import grpc
from bias_bindings import bias_pb2, bias_pb2_grpc
# Connect to BIAS server
channel = grpc.insecure_channel('localhost:50051')
stub = bias_pb2_grpc.BiasServiceStub(channel)
# Your data
data = {
"user": {
"id": 12345,
"name": "Alice Smith",
"email": "alice@example.com"
},
"preferences": {
"theme": "dark",
"notifications": True
}
}
# Encode to BIAS
request = bias_pb2.EncodeRequest(
json_input=json.dumps(data),
format="inline"
)
response = stub.Encode(request)
bias_encoded = response.bias_output
print(f"Original: {len(json.dumps(data))} chars")
print(f"BIAS: {len(bias_encoded)} chars")
print(f"Savings: {(1 - len(bias_encoded)/len(json.dumps(data)))*100:.1f}%")
# Decode back to JSON
decode_request = bias_pb2.DecodeRequest(bias_input=bias_encoded)
decode_response = stub.Decode(decode_request)
restored = json.loads(decode_response.json_output)
assert data == restored # Perfect round-trip!
import openai
import grpc
from bias_bindings import bias_pb2, bias_pb2_grpc
# Setup BIAS client
channel = grpc.insecure_channel('localhost:50051')
bias = bias_pb2_grpc.BiasServiceStub(channel)
# Large context data
context_data = {
"orders": [...], # 100+ order records
"inventory": [...], # 500+ products
"analytics": {...} # Large metrics object
}
# Encode with BIAS (saves 44-52% tokens!)
request = bias_pb2.EncodeRequest(
json_input=json.dumps(context_data),
format="inline"
)
bias_context = bias.Encode(request).bias_output
# Use with OpenAI
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{
"role": "system",
"content": "You will receive data in BIAS format (compact key=value pairs)."
},
{
"role": "user",
"content": f"Analyze this data:\n{bias_context}\n\nWhat are the top trends?"
}
]
)
print(response.choices[0].message.content)
import anthropic
from bias_client import BiasClient
# Setup clients
claude = anthropic.Anthropic(api_key="your-api-key")
bias = BiasClient()
# Encode your data
user_profile = {
"user_id": 789,
"purchase_history": [...], # Large array
"preferences": {...},
"recommendations": [...]
}
encoded = bias.encode(user_profile, format="inline")
# Send to Claude with much fewer tokens
message = claude.messages.create(
model="claude-3-opus-20240229",
max_tokens=1024,
messages=[{
"role": "user",
"content": f"""User profile (BIAS format):
{encoded}
Generate personalized product recommendations based on this profile."""
}]
)
print(message.content[0].text)
from bias_client import BiasClient
import concurrent.futures
bias = BiasClient()
# Process multiple documents
documents = [
{"id": 1, "data": {...}},
{"id": 2, "data": {...}},
# ... 1000 more
]
def process_document(doc):
# Encode to BIAS
encoded = bias.encode(doc["data"])
# Send to LLM
result = llm_api.process(encoded)
return {"id": doc["id"], "result": result}
# Process in parallel
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
results = list(executor.map(process_document, documents))
print(f"Processed {len(results)} documents")
print(f"Total token savings: ~48%")
import { BiasClient } from '@bias/client';
// Create client
const bias = new BiasClient({ endpoint: 'localhost:50051' });
// Your data
const data = {
user: {
id: 12345,
name: 'Alice Smith',
email: 'alice@example.com'
},
posts: [
{ id: 1, title: 'Hello World', views: 1523 },
{ id: 2, title: 'BIAS Guide', views: 892 }
]
};
// Encode
const encoded = await bias.encode(data, { format: 'inline' });
console.log('Original:', JSON.stringify(data).length, 'bytes');
console.log('BIAS:', encoded.length, 'bytes');
console.log('Savings:', ((1 - encoded.length / JSON.stringify(data).length) * 100).toFixed(1) + '%');
// Decode
const decoded = await bias.decode(encoded);
console.assert(JSON.stringify(data) === JSON.stringify(decoded));
import express from 'express';
import { BiasClient } from '@bias/client';
const app = express();
const bias = new BiasClient();
// BIAS compression middleware
app.use(async (req, res, next) => {
const originalJson = res.json.bind(res);
res.json = async function(data: any) {
// Check if client supports BIAS
if (req.headers['accept-encoding']?.includes('bias')) {
const encoded = await bias.encode(data);
res.setHeader('Content-Encoding', 'bias');
res.setHeader('Content-Type', 'text/plain');
return res.send(encoded);
}
// Fallback to JSON
return originalJson(data);
};
next();
});
// Your API routes
app.get('/api/data', async (req, res) => {
const data = await fetchLargeDataset();
res.json(data); // Automatically compressed with BIAS if supported!
});
app.listen(3000);
import Anthropic from '@anthropic-ai/sdk';
import { BiasClient } from '@bias/client';
const anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
const bias = new BiasClient();
async function analyzeWithClaude(data: any) {
// Encode to BIAS (save 44-52% tokens)
const encoded = await bias.encode(data, { format: 'inline' });
const message = await anthropic.messages.create({
model: 'claude-3-opus-20240229',
max_tokens: 1024,
messages: [{
role: 'user',
content: `Analyze this data (BIAS format):\n${encoded}\n\nProvide insights.`
}]
});
return message.content[0].text;
}
// Example usage
const result = await analyzeWithClaude({
sales: [...], // Large sales data
metrics: {...}
});
from bias_client import BiasClient
bias = BiasClient()
# JSON input
json_data = '{"name": "Alice", "age": 30}'
encoded_json = bias.encode(json_data) # Auto-detects JSON
# YAML input
yaml_data = """
name: Alice
age: 30
preferences:
theme: dark
"""
encoded_yaml = bias.encode(yaml_data) # Auto-detects YAML
# TOML input
toml_data = """
name = "Alice"
age = 30
[preferences]
theme = "dark"
"""
encoded_toml = bias.encode(toml_data) # Auto-detects TOML
# All produce equivalent BIAS output!
print(f"JSON → BIAS: {encoded_json}")
print(f"YAML → BIAS: {encoded_yaml}")
print(f"TOML → BIAS: {encoded_toml}")
from bias_client import BiasClient
bias = BiasClient()
# Start with YAML configuration
yaml_config = """
database:
host: localhost
port: 5432
credentials:
username: admin
password: secret
"""
# Convert YAML → JSON via BIAS
encoded = bias.encode(yaml_config, source_format="yaml")
json_output = bias.decode(encoded, target_format="json")
# Or convert YAML → TOML
toml_output = bias.decode(encoded, target_format="toml")
print("JSON:", json_output)
print("TOML:", toml_output)
import grpc
from bias_bindings import bias_pb2, bias_pb2_grpc
channel = grpc.insecure_channel('localhost:50051')
stub = bias_pb2_grpc.BiasServiceStub(channel)
# Stream large document in chunks
def encode_stream(data_iterator):
for chunk in data_iterator:
request = bias_pb2.EncodeRequest(
json_input=json.dumps(chunk),
format="inline"
)
response = stub.Encode(request)
yield response.bias_output
# Process large dataset
large_dataset = read_large_json_file() # Returns iterator
for encoded_chunk in encode_stream(large_dataset):
# Send to LLM in chunks
process_with_llm(encoded_chunk)
from functools import lru_cache
from bias_client import BiasClient
import hashlib
bias = BiasClient()
@lru_cache(maxsize=1000)
def encode_with_cache(data_hash: str, data: str):
"""Cache encoded BIAS output for frequently used data"""
return bias.encode(data)
def process_document(doc):
# Create hash of document
doc_str = json.dumps(doc, sort_keys=True)
doc_hash = hashlib.sha256(doc_str.encode()).hexdigest()
# Use cached encoding if available
encoded = encode_with_cache(doc_hash, doc_str)
# Send to LLM
return send_to_llm(encoded)
# Process many documents with repeated data patterns
# Significant speedup from caching!
results = [process_document(doc) for doc in documents]
from bias_client import BiasClient, BiasError
import logging
bias = BiasClient()
logger = logging.getLogger(__name__)
def safe_encode(data, fallback_to_json=True):
"""Safely encode data with fallback"""
try:
return bias.encode(data, format="inline")
except BiasError as e:
logger.warning(f"BIAS encoding failed: {e}")
if fallback_to_json:
logger.info("Falling back to JSON")
return json.dumps(data)
else:
raise
# Use in production
try:
encoded = safe_encode(my_data)
response = send_to_llm(encoded)
except Exception as e:
logger.error(f"Request failed: {e}")
# Handle error appropriately
Create a single BiasClient instance and reuse it across requests. Connection pooling significantly reduces overhead.
The "inline" format provides maximum token savings (52% avg). Use "frame" format only if you need schema separation.
Process multiple documents in parallel using thread pools or async. BIAS is thread-safe and scales linearly.
Cache encoded versions of frequently used data structures. Encoding is deterministic—same input always produces same output.
Track token savings and latency metrics. Optimize your data structures based on BIAS encoding efficiency.
Configure appropriate timeouts for your use case. BIAS typically responds in <100µs, but set reasonable limits.
Challenge: Large product catalogs consuming excessive tokens
Solution: BIAS encoding for catalog data
Result: 58% token reduction, $42K annual savings
Challenge: Real-time metrics summaries for LLM analysis
Solution: BIAS compression of time-series data
Result: 51% token savings, 3x more context per request
Challenge: User history/context in every support ticket
Solution: BIAS for customer profile encoding
Result: 63% token reduction, $18K annual savings
Start with our getting started guide and save 44-52% on LLM costs.