Examples and Tutorials
This guide provides practical examples and step-by-step tutorials for using the SPE9 Geomodeling Toolkit.
π Basic Examples
Example 1: Simple GP Modeling
from spe9_geomodeling import UnifiedSPE9Toolkit, load_spe9_data
# Load data
data = load_spe9_data() # Uses default SPE9 path
toolkit = UnifiedSPE9Toolkit()
toolkit.load_spe9_data(data)
# Create train/test split
X_train, X_test, y_train, y_test = toolkit.create_train_test_split(test_size=0.2)
# Train RBF kernel GP
model = toolkit.create_sklearn_model('gpr', kernel_type='rbf')
toolkit.train_sklearn_model(model, 'rbf_gpr')
# Evaluate
results = toolkit.evaluate_model('rbf_gpr', X_test, y_test)
print(f"RBF GP RΒ² Score: {results.r2:.4f}")
Example 2: Model Comparison
from spe9_geomodeling import UnifiedSPE9Toolkit
import matplotlib.pyplot as plt
toolkit = UnifiedSPE9Toolkit()
toolkit.load_synthetic_data(grid_size=(30, 30, 5)) # Synthetic for demo
X_train, X_test, y_train, y_test = toolkit.create_train_test_split()
# Train multiple models
models = {
'RBF GP': ('gpr', {'kernel_type': 'rbf'}),
'MatΓ©rn GP': ('gpr', {'kernel_type': 'matern'}),
'Combined GP': ('gpr', {'kernel_type': 'combined'}),
'Random Forest': ('rf', {'n_estimators': 100})
}
results = {}
for name, (model_type, params) in models.items():
model = toolkit.create_sklearn_model(model_type, **params)
toolkit.train_sklearn_model(model, name.lower().replace(' ', '_'))
results[name] = toolkit.evaluate_model(name.lower().replace(' ', '_'), X_test, y_test)
# Plot comparison
names = list(results.keys())
r2_scores = [results[name].r2 for name in names]
plt.figure(figsize=(10, 6))
plt.bar(names, r2_scores)
plt.ylabel('RΒ² Score')
plt.title('Model Performance Comparison')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
Example 3: Deep GP Experiment
from spe9_geomodeling import DeepGPExperiment
# Run comprehensive experiment
experiment = DeepGPExperiment()
results = experiment.run_comparison_experiment(
train_samples=800,
test_samples=200,
training_iterations=75
)
# Analyze results
print("Deep GP Experiment Results:")
print("-" * 50)
for model_name, result in results.items():
metrics = result['metrics']
time = result['training_time']
print(f"{model_name:20s} | RΒ²: {metrics['r2_score']:.4f} | "
f"RMSE: {metrics['rmse']:.2f} | Time: {time:.1f}s")
# Find best model
best_model = max(results.keys(), key=lambda x: results[x]['metrics']['r2_score'])
print(f"\nBest performing model: {best_model}")
π Advanced Examples
Example 4: Custom Kernel Development
from sklearn.gaussian_process.kernels import Kernel
import numpy as np
class CustomSpatialKernel(Kernel):
"""Custom kernel for geological data with anisotropy."""
def __init__(self, length_scale_x=1.0, length_scale_y=1.0, length_scale_z=1.0):
self.length_scale_x = length_scale_x
self.length_scale_y = length_scale_y
self.length_scale_z = length_scale_z
def __call__(self, X, Y=None, eval_gradient=False):
if Y is None:
Y = X
# Anisotropic distance calculation
X_scaled = X / np.array([self.length_scale_x, self.length_scale_y, self.length_scale_z])
Y_scaled = Y / np.array([self.length_scale_x, self.length_scale_y, self.length_scale_z])
dists = np.sum((X_scaled[:, None] - Y_scaled[None, :]) ** 2, axis=2)
K = np.exp(-0.5 * dists)
if eval_gradient:
# Gradient computation...
return K, np.zeros((X.shape[0], Y.shape[0], 3))
return K
# Use custom kernel
from sklearn.gaussian_process import GaussianProcessRegressor
custom_kernel = CustomSpatialKernel(length_scale_x=2.0, length_scale_y=2.0, length_scale_z=0.5)
model = GaussianProcessRegressor(kernel=custom_kernel)
model.fit(X_train, y_train)
Example 5: Uncertainty Quantification
from spe9_geomodeling import UnifiedSPE9Toolkit
import numpy as np
import matplotlib.pyplot as plt
toolkit = UnifiedSPE9Toolkit()
toolkit.load_spe9_data(load_spe9_data())
# Train GP model
X_train, X_test, y_train, y_test = toolkit.create_train_test_split()
model = toolkit.create_sklearn_model('gpr', kernel_type='combined')
toolkit.train_sklearn_model(model, 'combined_gpr')
# Get predictions with uncertainty
predictions, uncertainties = toolkit.predict_with_uncertainty('combined_gpr', X_test)
# Analyze uncertainty
print("Uncertainty Analysis:")
print(f"Mean uncertainty: {np.mean(uncertainties):.4f}")
print(f"Max uncertainty: {np.max(uncertainties):.4f}")
print(f"Min uncertainty: {np.min(uncertainties):.4f}")
# Plot prediction vs uncertainty
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(y_test, predictions, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Predictions vs True Values')
plt.subplot(1, 2, 2)
plt.scatter(predictions, uncertainties, alpha=0.6)
plt.xlabel('Predictions')
plt.ylabel('Uncertainty')
plt.title('Prediction Uncertainty')
plt.tight_layout()
plt.show()
Example 6: Hyperparameter Optimization
from spe9_geomodeling import UnifiedSPE9Toolkit
from sklearn.model_selection import GridSearchCV
from sklearn.gaussian_process.kernels import RBF, Matern
import numpy as np
toolkit = UnifiedSPE9Toolkit()
toolkit.load_synthetic_data()
X_train, X_test, y_train, y_test = toolkit.create_train_test_split()
# Define parameter grid
param_grid = {
'alpha': [1e-10, 1e-8, 1e-6],
'kernel__length_scale': np.logspace(-2, 2, 5),
'kernel__k1__length_scale': np.logspace(-2, 2, 3), # For combined kernels
'kernel__k2__length_scale': np.logspace(-2, 2, 3),
}
# Create base model
base_model = toolkit.create_sklearn_model('gpr', kernel_type='combined')
# Grid search
grid_search = GridSearchCV(
base_model,
param_grid,
cv=5,
scoring='r2',
n_jobs=-1,
verbose=1
)
print("Running hyperparameter optimization...")
grid_search.fit(X_train, y_train)
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best CV score: {grid_search.best_score_:.4f}")
# Evaluate on test set
test_score = grid_search.score(X_test, y_test)
print(f"Test set RΒ² score: {test_score:.4f}")
π¬ Research Examples
Example 7: Spatial Cross-Validation
from sklearn.model_selection import GroupKFold
import numpy as np
def create_spatial_groups(X, n_groups=5):
"""Create spatial groups for cross-validation."""
# Simple spatial grouping based on x-coordinate
x_coords = X[:, 0]
group_boundaries = np.percentile(x_coords, np.linspace(0, 100, n_groups + 1))
groups = np.digitize(x_coords, group_boundaries) - 1
return np.clip(groups, 0, n_groups - 1)
# Load data
toolkit = UnifiedSPE9Toolkit()
toolkit.load_spe9_data(load_spe9_data())
X_train, X_test, y_train, y_test = toolkit.create_train_test_split()
# Create spatial groups
spatial_groups = create_spatial_groups(X_train, n_groups=5)
# Spatial cross-validation
group_kfold = GroupKFold(n_splits=5)
model = toolkit.create_sklearn_model('gpr', kernel_type='combined')
spatial_scores = []
for train_idx, val_idx in group_kfold.split(X_train, y_train, groups=spatial_groups):
X_fold_train, X_fold_val = X_train[train_idx], X_train[val_idx]
y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]
model.fit(X_fold_train, y_fold_train)
score = model.score(X_fold_val, y_fold_val)
spatial_scores.append(score)
print(f"Spatial CV scores: {spatial_scores}")
print(f"Mean spatial CV score: {np.mean(spatial_scores):.4f} Β± {np.std(spatial_scores):.4f}")
Example 8: Multi-Property Modeling
# Simulate multi-property data
def create_multi_property_data(X):
"""Create correlated properties (PERMX, PORO, NTG)."""
np.random.seed(42)
# Base permeability
permx = np.exp(np.random.normal(5, 2, len(X)))
# Correlated porosity
poro = 0.3 * np.log(permx) + np.random.normal(0, 0.05, len(X))
poro = np.clip(poro, 0.05, 0.35)
# Net-to-gross ratio
ntg = 0.8 + 0.1 * np.log(permx) / np.max(np.log(permx)) + np.random.normal(0, 0.1, len(X))
ntg = np.clip(ntg, 0.3, 1.0)
return {'PERMX': permx, 'PORO': poro, 'NTG': ntg}
# Multi-property modeling
toolkit = UnifiedSPE9Toolkit()
toolkit.load_synthetic_data()
X_train, X_test, y_train, y_test = toolkit.create_train_test_split()
# Create multi-property targets
properties = create_multi_property_data(X_train)
# Train separate models for each property
models = {}
results = {}
for prop_name, prop_values in properties.items():
print(f"\nTraining model for {prop_name}...")
model = toolkit.create_sklearn_model('gpr', kernel_type='combined')
model.fit(X_train, prop_values)
models[prop_name] = model
# Evaluate on test set
test_properties = create_multi_property_data(X_test)
test_score = model.score(X_test, test_properties[prop_name])
results[prop_name] = test_score
print(f"{prop_name} RΒ² score: {test_score:.4f}")
# Cross-property correlation analysis
import matplotlib.pyplot as plt
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
prop_names = list(properties.keys())
for i, (prop1, prop2) in enumerate([(0,1), (0,2), (1,2)]):
axes[i].scatter(properties[prop_names[prop1]], properties[prop_names[prop2]], alpha=0.6)
axes[i].set_xlabel(prop_names[prop1])
axes[i].set_ylabel(prop_names[prop2])
# Calculate correlation
corr = np.corrcoef(properties[prop_names[prop1]], properties[prop_names[prop2]])[0,1]
axes[i].set_title(f'Correlation: {corr:.3f}')
plt.tight_layout()
plt.show()
π¨ Visualization Examples
Example 9: Advanced Plotting
from spe9_geomodeling import SPE9Plotter
import matplotlib.pyplot as plt
import numpy as np
# Load and prepare data
toolkit = UnifiedSPE9Toolkit()
data = load_spe9_data()
toolkit.load_spe9_data(data)
# Train model
X_train, X_test, y_train, y_test = toolkit.create_train_test_split()
model = toolkit.create_sklearn_model('gpr', kernel_type='combined')
toolkit.train_sklearn_model(model, 'combined_gpr')
# Get full grid predictions
predictions = toolkit.predict_full_grid('combined_gpr')
# Create plotter
plotter = SPE9Plotter()
# Plot original vs predicted slices
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
# Original data slices
original_data = data['properties']['PERMX'].reshape(data['grid_shape'])
slice_indices = [5, 12, 20] # Different Z slices
for i, z_idx in enumerate(slice_indices):
# Original
im1 = axes[0, i].imshow(original_data[:, :, z_idx], cmap='viridis', origin='lower')
axes[0, i].set_title(f'Original PERMX - Z={z_idx}')
plt.colorbar(im1, ax=axes[0, i])
# Predicted
pred_data = predictions.reshape(data['grid_shape'])
im2 = axes[1, i].imshow(pred_data[:, :, z_idx], cmap='viridis', origin='lower')
axes[1, i].set_title(f'Predicted PERMX - Z={z_idx}')
plt.colorbar(im2, ax=axes[1, i])
plt.tight_layout()
plt.show()
Example 10: Interactive Visualization
# Requires: pip install plotly
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
def create_interactive_3d_plot(data, predictions):
"""Create interactive 3D visualization."""
# Sample points for visualization (full grid would be too dense)
n_points = 1000
indices = np.random.choice(len(data), n_points, replace=False)
x, y, z = data[indices, 0], data[indices, 1], data[indices, 2]
values = predictions[indices]
fig = go.Figure(data=go.Scatter3d(
x=x, y=y, z=z,
mode='markers',
marker=dict(
size=3,
color=values,
colorscale='Viridis',
colorbar=dict(title="PERMX"),
opacity=0.8
),
text=[f'PERMX: {v:.2f}' for v in values],
hovertemplate='X: %{x}<br>Y: %{y}<br>Z: %{z}<br>%{text}<extra></extra>'
))
fig.update_layout(
title='3D Permeability Distribution',
scene=dict(
xaxis_title='X',
yaxis_title='Y',
zaxis_title='Z'
),
width=800,
height=600
)
return fig
# Create interactive plot
if 'predictions' in locals():
fig = create_interactive_3d_plot(X_test, predictions[:len(X_test)])
fig.show()
π§ Integration Examples
Example 11: Custom Pipeline
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
# Create custom preprocessing pipeline
def create_geomodeling_pipeline():
"""Create a complete geomodeling pipeline."""
pipeline = Pipeline([
('scaler', StandardScaler()),
('pca', PCA(n_components=0.95)), # Keep 95% of variance
('gpr', toolkit.create_sklearn_model('gpr', kernel_type='combined'))
])
return pipeline
# Use pipeline
toolkit = UnifiedSPE9Toolkit()
toolkit.load_synthetic_data()
X_train, X_test, y_train, y_test = toolkit.create_train_test_split()
pipeline = create_geomodeling_pipeline()
pipeline.fit(X_train, y_train)
# Evaluate
score = pipeline.score(X_test, y_test)
print(f"Pipeline RΒ² score: {score:.4f}")
# Analyze components
print(f"PCA components retained: {pipeline.named_steps['pca'].n_components_}")
print(f"Explained variance ratio: {pipeline.named_steps['pca'].explained_variance_ratio_.sum():.4f}")
Example 12: Export and Visualization
from spe9_geomodeling import GRDECLParser
# Complete workflow with export
def complete_geomodeling_workflow(input_file, output_prefix):
"""Complete workflow from GRDECL input to results export."""
# Load data
parser = GRDECLParser(input_file)
data = parser.load_data()
# Set up toolkit
toolkit = UnifiedSPE9Toolkit()
toolkit.load_spe9_data(data)
# Train model
X_train, X_test, y_train, y_test = toolkit.create_train_test_split()
model = toolkit.create_sklearn_model('gpr', kernel_type='combined')
toolkit.train_sklearn_model(model, 'final_model')
# Evaluate
results = toolkit.evaluate_model('final_model', X_test, y_test)
print(f"Model Performance: RΒ² = {results.r2:.4f}, RMSE = {results.rmse:.2f}")
# Full grid prediction
predictions = toolkit.predict_full_grid('final_model')
uncertainties = toolkit.predict_uncertainty('final_model')
# Export results
toolkit.export_to_grdecl(predictions, f"{output_prefix}_predictions.grdecl")
toolkit.export_to_grdecl(uncertainties, f"{output_prefix}_uncertainty.grdecl")
# Create visualizations
toolkit.plot_results('final_model', save_path=f"{output_prefix}_results.png")
return results, predictions, uncertainties
# Run complete workflow
# results, preds, uncert = complete_geomodeling_workflow('SPE9.GRDECL', 'spe9_output')
π‘ Tips and Best Practices
Performance Tips
# For large datasets
toolkit = UnifiedSPE9Toolkit()
toolkit.load_spe9_data(data)
# Use smaller training sets
X_train, X_test, y_train, y_test = toolkit.create_train_test_split(
train_size=0.1, # Use only 10% for training
test_size=0.05 # 5% for testing
)
# Use sparse GP for very large datasets
if len(X_train) > 5000:
model = toolkit.create_gpytorch_model('sparse', inducing_points=500)
else:
model = toolkit.create_sklearn_model('gpr', kernel_type='rbf')
Memory Management
import gc
import torch
# Clear GPU memory (if using GPyTorch)
if torch.cuda.is_available():
torch.cuda.empty_cache()
# Clear Python memory
gc.collect()
# Monitor memory usage
import psutil
print(f"Memory usage: {psutil.virtual_memory().percent:.1f}%")
Error Handling
from spe9_geomodeling import SPE9DataError, ModelTrainingError
try:
toolkit = UnifiedSPE9Toolkit()
data = load_spe9_data()
toolkit.load_spe9_data(data)
# Training...
model = toolkit.create_sklearn_model('gpr', kernel_type='combined')
toolkit.train_sklearn_model(model, 'test_model')
except SPE9DataError as e:
print(f"Data loading error: {e}")
# Fallback to synthetic data
toolkit.load_synthetic_data()
except ModelTrainingError as e:
print(f"Training error: {e}")
# Try simpler model
model = toolkit.create_sklearn_model('gpr', kernel_type='rbf')
except Exception as e:
print(f"Unexpected error: {e}")
# Log error and continue with default settings
Next Steps:
- Explore Deep GP Guide for advanced modeling techniques
- Check API Reference for complete function documentation
- See Model Comparison Guide for choosing the right approach