Basic Python Tracing Tutorial
Basic Python Tracing Tutorial
This tutorial will teach you the fundamental patterns for implementing LangFuse tracing in your Python applications. You’ll learn by building increasingly complex examples.
Learning Objectives
By the end of this tutorial, you’ll be able to: - Use decorator-based tracing effectively - Create manual traces for complex workflows - Handle errors and debugging scenarios - Monitor performance and resource usage - Implement user and session tracking - Apply advanced tracing features
Prerequisites
Before starting, ensure you have: - Completed the Quick Start Guide - Python 3.8+ installed - LangFuse SDK installed (pip install langfuse) - Your environment variables configured
Part 1: Master the Decorator Pattern
Let’s start with the most common and easiest tracing approach.
Exercise 1: Simple Function Tracing
from langfuse.decorators import observe
@observe()
def process_document(document: str) -> dict:
"""Process a document and return analysis results."""
word_count = len(document.split())
sentiment = analyze_sentiment(document)
return {
"word_count": word_count,
"sentiment": sentiment,
"processed": True
}
@observe()
def analyze_sentiment(text: str) -> str:
"""Analyze sentiment of the given text."""
# Simulate sentiment analysis
if "good" in text.lower() or "great" in text.lower():
return "positive"
elif "bad" in text.lower() or "terrible" in text.lower():
return "negative"
else:
return "neutral"
# Try it out
if __name__ == "__main__":
result = process_document("This is a great example document")
print(f"Result: {result}")What you learned: The @observe() decorator automatically captures function inputs, outputs, and execution time. Notice how nested functions create a trace hierarchy.
Exercise 2: Add Context and Metadata
@observe(name="document_processor")
def process_document_with_context(document: str, user_id: str) -> dict:
"""Process a document with user context."""
# The decorator captures function parameters automatically
word_count = len(document.split())
sentiment = analyze_sentiment(document)
# Add custom metadata through the trace
from langfuse import Langfuse
langfuse = Langfuse()
# Get current trace and add metadata
current_trace = langfuse.get_current_trace()
if current_trace:
current_trace.update(
metadata={
"user_id": user_id,
"document_length": len(document),
"processing_version": "1.0"
},
tags=["document", "processing", "v1"]
)
return {
"word_count": word_count,
"sentiment": sentiment,
"user_id": user_id,
"processed": True
}
# Try it out
result = process_document_with_context(
"This is a good document to process",
user_id="user_123"
)What you learned: You can add custom names, metadata, and tags to provide richer context for your traces.
Part 2: Manual Tracing for Complex Workflows
Sometimes you need more control than decorators provide.
Exercise 3: Manual Trace Creation
import os
from langfuse import Langfuse
langfuse = Langfuse(
public_key=os.getenv("LANGFUSE_PUBLIC_KEY"),
secret_key=os.getenv("LANGFUSE_SECRET_KEY"),
host=os.getenv("LANGFUSE_HOST")
)
def manual_processing_example():
"""Example of manual trace creation for complex workflows."""
# Create a main trace
trace = langfuse.trace(
name="document_processing_pipeline",
input={"document": "Sample document text"},
tags=["manual", "pipeline"]
)
# Step 1: Preprocessing
preprocessing_span = trace.span(
name="preprocessing",
input={"raw_text": "Sample document text"}
)
# Simulate preprocessing work
import time
time.sleep(0.1)
cleaned_text = "sample document text"
preprocessing_span.update(output={"cleaned_text": cleaned_text})
preprocessing_span.end()
# Step 2: Analysis
analysis_span = trace.span(
name="analysis",
input={"text": cleaned_text}
)
# Simulate analysis
time.sleep(0.2)
analysis_result = {"sentiment": "neutral", "confidence": 0.8}
analysis_span.update(output=analysis_result)
analysis_span.end()
# Step 3: Post-processing
postprocessing_span = trace.span(
name="postprocessing",
input={"analysis": analysis_result}
)
# Simulate post-processing
time.sleep(0.1)
final_result = {
"sentiment": analysis_result["sentiment"],
"confidence": analysis_result["confidence"],
"processed": True
}
postprocessing_span.update(output=final_result)
postprocessing_span.end()
# Update the main trace
trace.update(output=final_result)
return final_result
# Run the example
result = manual_processing_example()
print(f"Manual processing result: {result}")What you learned: Manual tracing gives you complete control over the trace structure. You can create nested spans to represent different processing steps.
Exercise 4: Batch Processing with Detailed Tracking
def process_batch_of_documents(documents: list, user_id: str) -> dict:
"""Process multiple documents with detailed tracing."""
# Create main trace
trace = langfuse.trace(
name="batch_processing",
input={"batch_size": len(documents), "user_id": user_id},
tags=["batch", "processing"]
)
# Track validation
validation_span = trace.span(name="validation")
validation_span.update(input={"document_count": len(documents)})
valid_documents = []
for i, doc in enumerate(documents):
if doc and len(doc.strip()) > 0:
valid_documents.append(doc)
validation_span.update(output={
"valid_documents": len(valid_documents),
"invalid_documents": len(documents) - len(valid_documents)
})
validation_span.end()
# Track processing
processing_span = trace.span(name="processing")
processing_span.update(input={"documents_to_process": len(valid_documents)})
results = []
for i, doc in enumerate(valid_documents):
# Create a span for each document
doc_span = processing_span.span(
name=f"process_document_{i}",
input={"document": doc[:50] + "..." if len(doc) > 50 else doc}
)
# Process the document
result = {
"word_count": len(doc.split()),
"char_count": len(doc),
"index": i
}
doc_span.update(output=result)
doc_span.end()
results.append(result)
processing_span.update(output={"processed_count": len(results)})
processing_span.end()
# Calculate summary
total_words = sum(r["word_count"] for r in results)
total_chars = sum(r["char_count"] for r in results)
summary = {
"total_documents": len(documents),
"processed_successfully": len(results),
"total_words": total_words,
"total_chars": total_chars,
"success_rate": len(results) / len(documents) if documents else 0
}
trace.update(output=summary)
return summary
# Try it out
documents = [
"This is the first document",
"Here's another document to process",
"", # Invalid document
"Final document in the batch"
]
result = process_batch_of_documents(documents, "user_123")
print(f"Batch processing result: {result}")What you learned: You can create detailed trace hierarchies with nested spans to track complex batch operations.
Part 3: Error Handling and Debugging
Exercise 5: Automatic Error Capture
@observe()
def risky_operation(data: dict):
"""Operation that might fail - errors are automatically captured."""
try:
if not data.get("safe", False):
raise ValueError("Unsafe operation detected")
# Simulate processing
import time
time.sleep(0.1)
result = {"status": "success", "data": data}
return result
except Exception as e:
# The @observe decorator automatically captures exceptions
print(f"Operation failed: {e}")
raise # Re-raise the exception
# Try both success and failure cases
print("Testing successful operation:")
try:
result = risky_operation({"safe": True, "content": "test"})
print(f"Success: {result}")
except Exception as e:
print(f"Unexpected error: {e}")
print("\nTesting failed operation:")
try:
result = risky_operation({"safe": False, "content": "test"})
print(f"Success: {result}")
except ValueError as e:
print(f"Expected error caught: {e}")What you learned: The @observe() decorator automatically captures exceptions and error information in your traces.
Exercise 6: Custom Error Context
def process_with_error_context(data: dict):
"""Process data with rich error context."""
trace = langfuse.trace(
name="process_with_context",
input=data,
metadata={
"user_id": data.get("user_id"),
"session_id": data.get("session_id"),
"version": "1.0.0"
}
)
try:
# Simulate different types of errors
if data.get("trigger_error") == "validation":
raise ValueError("Invalid input data")
elif data.get("trigger_error") == "processing":
raise RuntimeError("Processing failed")
elif data.get("trigger_error") == "timeout":
raise TimeoutError("Operation timed out")
result = {"success": True, "processed": True}
trace.update(output=result)
return result
except Exception as e:
trace.update(
output={"error": str(e), "error_type": type(e).__name__},
level="ERROR",
status_message=f"Processing failed: {str(e)}"
)
raise
# Try different error scenarios
test_cases = [
{"user_id": "user_1", "trigger_error": "validation"},
{"user_id": "user_2", "trigger_error": "processing"},
{"user_id": "user_3", "trigger_error": "timeout"},
{"user_id": "user_4"} # Success case
]
for case in test_cases:
try:
result = process_with_error_context(case)
print(f"Success for {case}: {result}")
except Exception as e:
print(f"Error for {case}: {e}")What you learned: You can add rich error context to your traces to help with debugging and monitoring.
Part 4: Performance Monitoring
Exercise 7: Resource Usage Tracking
import psutil
import time
@observe()
def resource_intensive_operation():
"""Track resource usage during operation."""
# Get initial resource state
process = psutil.Process()
initial_memory = process.memory_info().rss / 1024 / 1024 # MB
# Perform operation
data = []
for i in range(50000): # Reduced for tutorial
data.append(f"item_{i}")
# Simulate some processing
time.sleep(0.5)
# Get final resource state
final_memory = process.memory_info().rss / 1024 / 1024 # MB
return {
"items_processed": len(data),
"memory_usage": {
"initial_mb": round(initial_memory, 2),
"final_mb": round(final_memory, 2),
"delta_mb": round(final_memory - initial_memory, 2)
},
"processing_time": 0.5
}
# Run the resource tracking
result = resource_intensive_operation()
print(f"Resource usage: {result}")What you learned: You can monitor resource usage alongside your application logic to identify performance bottlenecks.
Part 5: User and Session Context
Exercise 8: Session Tracking
import uuid
class UserSession:
"""Track user sessions with LangFuse."""
def __init__(self, user_id: str, session_id: str = None):
self.user_id = user_id
# Generate session ID with same format as trace ID and span ID
self.session_id = session_id or str(uuid.uuid4())
self.trace = langfuse.trace(
name="user_session",
session_id=self.session_id,
user_id=user_id,
tags=["session", "user_interaction"]
)
@observe()
def process_query(self, query: str) -> str:
"""Process a user query within the session."""
# Simulate query processing
response = f"Response to: {query}"
# Add session context - same session ID for all traces in this session
langfuse.trace(
name="session_query",
session_id=self.session_id,
user_id=self.user_id,
input={"query": query},
output={"response": response}
)
return response
@observe()
def end_session(self):
"""End the user session."""
self.trace.update(output={"session_ended": True})
return {"session_ended": True}
# Try the session tracking with UUID format
session = UserSession("user_123") # Auto-generates UUID session ID
print(f"Session ID: {session.session_id}") # Will show UUID format
# Or provide your own session ID
custom_session = UserSession("user_456", str(uuid.uuid4()))
print(f"Custom Session ID: {custom_session.session_id}")What you learned: You can track user sessions and group related traces together for better analysis.
Part 6: Advanced Features
Exercise 9: Conditional Tracing
import os
import random
def conditional_trace(func):
"""Decorator that only traces in certain environments."""
def wrapper(*args, **kwargs):
# Only trace in development or staging
if os.getenv("ENVIRONMENT") in ["development", "staging"]:
return observe()(func)(*args, **kwargs)
else:
return func(*args, **kwargs)
return wrapper
@conditional_trace
def debug_function():
"""Function that's only traced in non-production environments."""
return {"debug": True, "environment": os.getenv("ENVIRONMENT", "unknown")}
# Sampling-based tracing
@observe(sample_rate=0.5) # Only trace 50% of calls
def high_frequency_function():
"""Function called very frequently."""
return {"processed": True, "timestamp": time.time()}
# Test conditional tracing
print("Testing conditional tracing:")
result = debug_function()
print(f"Debug function result: {result}")
print("\nTesting sampling (run multiple times to see sampling effect):")
for i in range(5):
result = high_frequency_function()
print(f"Call {i+1}: {result}")What you learned: You can control when tracing happens based on environment variables or sampling rates.
Summary
Congratulations! You’ve learned the essential patterns for Python tracing:
- ✅ Decorator-based tracing - The easiest way to add tracing
- ✅ Manual tracing - For complex workflows requiring precise control
- ✅ Error handling - Automatic and custom error capture
- ✅ Performance monitoring - Resource usage tracking
- ✅ Session management - User and session context
- ✅ Advanced features - Conditional tracing and sampling
Practice Exercises
Try these exercises to reinforce your learning:
- Create a text processing pipeline that uses both decorators and manual tracing
- Add error handling to a function that processes user input
- Implement session tracking for a simple chatbot
- Monitor resource usage in a data processing function
- Set up conditional tracing based on user roles
What’s Next?
Now that you understand the fundamentals, explore these guides:
- Python SDK - Learn the full SDK capabilities
- Raw Requests - Use HTTP requests directly
- OpenTelemetry - Implement standardized tracing
- Azure Deployment - Deploy your own LangFuse instance
🚀 You’re ready to implement comprehensive tracing in your applications!