Basic Python Tracing Tutorial
Basic Python Tracing Tutorial
This tutorial will teach you the fundamental patterns for implementing LangFuse tracing in your Python applications. You’ll learn by building increasingly complex examples.
Learning Objectives
By the end of this tutorial, you’ll be able to: - Use decorator-based tracing effectively - Create manual traces for complex workflows - Handle errors and debugging scenarios - Monitor performance and resource usage - Implement user and session tracking - Apply advanced tracing features
Prerequisites
Before starting, ensure you have: - Completed the Quick Start Guide - Python 3.8+ installed - LangFuse SDK installed (pip install langfuse
) - Your environment variables configured
Part 1: Master the Decorator Pattern
Let’s start with the most common and easiest tracing approach.
Exercise 1: Simple Function Tracing
from langfuse.decorators import observe
@observe()
def process_document(document: str) -> dict:
"""Process a document and return analysis results."""
= len(document.split())
word_count = analyze_sentiment(document)
sentiment
return {
"word_count": word_count,
"sentiment": sentiment,
"processed": True
}
@observe()
def analyze_sentiment(text: str) -> str:
"""Analyze sentiment of the given text."""
# Simulate sentiment analysis
if "good" in text.lower() or "great" in text.lower():
return "positive"
elif "bad" in text.lower() or "terrible" in text.lower():
return "negative"
else:
return "neutral"
# Try it out
if __name__ == "__main__":
= process_document("This is a great example document")
result print(f"Result: {result}")
What you learned: The @observe()
decorator automatically captures function inputs, outputs, and execution time. Notice how nested functions create a trace hierarchy.
Exercise 2: Add Context and Metadata
@observe(name="document_processor")
def process_document_with_context(document: str, user_id: str) -> dict:
"""Process a document with user context."""
# The decorator captures function parameters automatically
= len(document.split())
word_count = analyze_sentiment(document)
sentiment
# Add custom metadata through the trace
from langfuse import Langfuse
= Langfuse()
langfuse
# Get current trace and add metadata
= langfuse.get_current_trace()
current_trace if current_trace:
current_trace.update(={
metadata"user_id": user_id,
"document_length": len(document),
"processing_version": "1.0"
},=["document", "processing", "v1"]
tags
)
return {
"word_count": word_count,
"sentiment": sentiment,
"user_id": user_id,
"processed": True
}
# Try it out
= process_document_with_context(
result "This is a good document to process",
="user_123"
user_id )
What you learned: You can add custom names, metadata, and tags to provide richer context for your traces.
Part 2: Manual Tracing for Complex Workflows
Sometimes you need more control than decorators provide.
Exercise 3: Manual Trace Creation
import os
from langfuse import Langfuse
= Langfuse(
langfuse =os.getenv("LANGFUSE_PUBLIC_KEY"),
public_key=os.getenv("LANGFUSE_SECRET_KEY"),
secret_key=os.getenv("LANGFUSE_HOST")
host
)
def manual_processing_example():
"""Example of manual trace creation for complex workflows."""
# Create a main trace
= langfuse.trace(
trace ="document_processing_pipeline",
nameinput={"document": "Sample document text"},
=["manual", "pipeline"]
tags
)
# Step 1: Preprocessing
= trace.span(
preprocessing_span ="preprocessing",
nameinput={"raw_text": "Sample document text"}
)
# Simulate preprocessing work
import time
0.1)
time.sleep(= "sample document text"
cleaned_text
={"cleaned_text": cleaned_text})
preprocessing_span.update(output
preprocessing_span.end()
# Step 2: Analysis
= trace.span(
analysis_span ="analysis",
nameinput={"text": cleaned_text}
)
# Simulate analysis
0.2)
time.sleep(= {"sentiment": "neutral", "confidence": 0.8}
analysis_result
=analysis_result)
analysis_span.update(output
analysis_span.end()
# Step 3: Post-processing
= trace.span(
postprocessing_span ="postprocessing",
nameinput={"analysis": analysis_result}
)
# Simulate post-processing
0.1)
time.sleep(= {
final_result "sentiment": analysis_result["sentiment"],
"confidence": analysis_result["confidence"],
"processed": True
}
=final_result)
postprocessing_span.update(output
postprocessing_span.end()
# Update the main trace
=final_result)
trace.update(output
return final_result
# Run the example
= manual_processing_example()
result print(f"Manual processing result: {result}")
What you learned: Manual tracing gives you complete control over the trace structure. You can create nested spans to represent different processing steps.
Exercise 4: Batch Processing with Detailed Tracking
def process_batch_of_documents(documents: list, user_id: str) -> dict:
"""Process multiple documents with detailed tracing."""
# Create main trace
= langfuse.trace(
trace ="batch_processing",
nameinput={"batch_size": len(documents), "user_id": user_id},
=["batch", "processing"]
tags
)
# Track validation
= trace.span(name="validation")
validation_span input={"document_count": len(documents)})
validation_span.update(
= []
valid_documents for i, doc in enumerate(documents):
if doc and len(doc.strip()) > 0:
valid_documents.append(doc)
={
validation_span.update(output"valid_documents": len(valid_documents),
"invalid_documents": len(documents) - len(valid_documents)
})
validation_span.end()
# Track processing
= trace.span(name="processing")
processing_span input={"documents_to_process": len(valid_documents)})
processing_span.update(
= []
results for i, doc in enumerate(valid_documents):
# Create a span for each document
= processing_span.span(
doc_span =f"process_document_{i}",
nameinput={"document": doc[:50] + "..." if len(doc) > 50 else doc}
)
# Process the document
= {
result "word_count": len(doc.split()),
"char_count": len(doc),
"index": i
}
=result)
doc_span.update(output
doc_span.end()
results.append(result)
={"processed_count": len(results)})
processing_span.update(output
processing_span.end()
# Calculate summary
= sum(r["word_count"] for r in results)
total_words = sum(r["char_count"] for r in results)
total_chars
= {
summary "total_documents": len(documents),
"processed_successfully": len(results),
"total_words": total_words,
"total_chars": total_chars,
"success_rate": len(results) / len(documents) if documents else 0
}
=summary)
trace.update(output
return summary
# Try it out
= [
documents "This is the first document",
"Here's another document to process",
"", # Invalid document
"Final document in the batch"
]
= process_batch_of_documents(documents, "user_123")
result print(f"Batch processing result: {result}")
What you learned: You can create detailed trace hierarchies with nested spans to track complex batch operations.
Part 3: Error Handling and Debugging
Exercise 5: Automatic Error Capture
@observe()
def risky_operation(data: dict):
"""Operation that might fail - errors are automatically captured."""
try:
if not data.get("safe", False):
raise ValueError("Unsafe operation detected")
# Simulate processing
import time
0.1)
time.sleep(
= {"status": "success", "data": data}
result return result
except Exception as e:
# The @observe decorator automatically captures exceptions
print(f"Operation failed: {e}")
raise # Re-raise the exception
# Try both success and failure cases
print("Testing successful operation:")
try:
= risky_operation({"safe": True, "content": "test"})
result print(f"Success: {result}")
except Exception as e:
print(f"Unexpected error: {e}")
print("\nTesting failed operation:")
try:
= risky_operation({"safe": False, "content": "test"})
result print(f"Success: {result}")
except ValueError as e:
print(f"Expected error caught: {e}")
What you learned: The @observe()
decorator automatically captures exceptions and error information in your traces.
Exercise 6: Custom Error Context
def process_with_error_context(data: dict):
"""Process data with rich error context."""
= langfuse.trace(
trace ="process_with_context",
nameinput=data,
={
metadata"user_id": data.get("user_id"),
"session_id": data.get("session_id"),
"version": "1.0.0"
}
)
try:
# Simulate different types of errors
if data.get("trigger_error") == "validation":
raise ValueError("Invalid input data")
elif data.get("trigger_error") == "processing":
raise RuntimeError("Processing failed")
elif data.get("trigger_error") == "timeout":
raise TimeoutError("Operation timed out")
= {"success": True, "processed": True}
result =result)
trace.update(outputreturn result
except Exception as e:
trace.update(={"error": str(e), "error_type": type(e).__name__},
output="ERROR",
level=f"Processing failed: {str(e)}"
status_message
)raise
# Try different error scenarios
= [
test_cases "user_id": "user_1", "trigger_error": "validation"},
{"user_id": "user_2", "trigger_error": "processing"},
{"user_id": "user_3", "trigger_error": "timeout"},
{"user_id": "user_4"} # Success case
{
]
for case in test_cases:
try:
= process_with_error_context(case)
result print(f"Success for {case}: {result}")
except Exception as e:
print(f"Error for {case}: {e}")
What you learned: You can add rich error context to your traces to help with debugging and monitoring.
Part 4: Performance Monitoring
Exercise 7: Resource Usage Tracking
import psutil
import time
@observe()
def resource_intensive_operation():
"""Track resource usage during operation."""
# Get initial resource state
= psutil.Process()
process = process.memory_info().rss / 1024 / 1024 # MB
initial_memory
# Perform operation
= []
data for i in range(50000): # Reduced for tutorial
f"item_{i}")
data.append(
# Simulate some processing
0.5)
time.sleep(
# Get final resource state
= process.memory_info().rss / 1024 / 1024 # MB
final_memory
return {
"items_processed": len(data),
"memory_usage": {
"initial_mb": round(initial_memory, 2),
"final_mb": round(final_memory, 2),
"delta_mb": round(final_memory - initial_memory, 2)
},"processing_time": 0.5
}
# Run the resource tracking
= resource_intensive_operation()
result print(f"Resource usage: {result}")
What you learned: You can monitor resource usage alongside your application logic to identify performance bottlenecks.
Part 5: User and Session Context
Exercise 8: Session Tracking
import uuid
class UserSession:
"""Track user sessions with LangFuse."""
def __init__(self, user_id: str, session_id: str = None):
self.user_id = user_id
# Generate session ID with same format as trace ID and span ID
self.session_id = session_id or str(uuid.uuid4())
self.trace = langfuse.trace(
="user_session",
name=self.session_id,
session_id=user_id,
user_id=["session", "user_interaction"]
tags
)
@observe()
def process_query(self, query: str) -> str:
"""Process a user query within the session."""
# Simulate query processing
= f"Response to: {query}"
response
# Add session context - same session ID for all traces in this session
langfuse.trace(="session_query",
name=self.session_id,
session_id=self.user_id,
user_idinput={"query": query},
={"response": response}
output
)
return response
@observe()
def end_session(self):
"""End the user session."""
self.trace.update(output={"session_ended": True})
return {"session_ended": True}
# Try the session tracking with UUID format
= UserSession("user_123") # Auto-generates UUID session ID
session print(f"Session ID: {session.session_id}") # Will show UUID format
# Or provide your own session ID
= UserSession("user_456", str(uuid.uuid4()))
custom_session print(f"Custom Session ID: {custom_session.session_id}")
What you learned: You can track user sessions and group related traces together for better analysis.
Part 6: Advanced Features
Exercise 9: Conditional Tracing
import os
import random
def conditional_trace(func):
"""Decorator that only traces in certain environments."""
def wrapper(*args, **kwargs):
# Only trace in development or staging
if os.getenv("ENVIRONMENT") in ["development", "staging"]:
return observe()(func)(*args, **kwargs)
else:
return func(*args, **kwargs)
return wrapper
@conditional_trace
def debug_function():
"""Function that's only traced in non-production environments."""
return {"debug": True, "environment": os.getenv("ENVIRONMENT", "unknown")}
# Sampling-based tracing
@observe(sample_rate=0.5) # Only trace 50% of calls
def high_frequency_function():
"""Function called very frequently."""
return {"processed": True, "timestamp": time.time()}
# Test conditional tracing
print("Testing conditional tracing:")
= debug_function()
result print(f"Debug function result: {result}")
print("\nTesting sampling (run multiple times to see sampling effect):")
for i in range(5):
= high_frequency_function()
result print(f"Call {i+1}: {result}")
What you learned: You can control when tracing happens based on environment variables or sampling rates.
Summary
Congratulations! You’ve learned the essential patterns for Python tracing:
- ✅ Decorator-based tracing - The easiest way to add tracing
- ✅ Manual tracing - For complex workflows requiring precise control
- ✅ Error handling - Automatic and custom error capture
- ✅ Performance monitoring - Resource usage tracking
- ✅ Session management - User and session context
- ✅ Advanced features - Conditional tracing and sampling
Practice Exercises
Try these exercises to reinforce your learning:
- Create a text processing pipeline that uses both decorators and manual tracing
- Add error handling to a function that processes user input
- Implement session tracking for a simple chatbot
- Monitor resource usage in a data processing function
- Set up conditional tracing based on user roles
What’s Next?
Now that you understand the fundamentals, explore these guides:
- Python SDK - Learn the full SDK capabilities
- Raw Requests - Use HTTP requests directly
- OpenTelemetry - Implement standardized tracing
- Azure Deployment - Deploy your own LangFuse instance
🚀 You’re ready to implement comprehensive tracing in your applications!