Store MLflow Artifacts in S3
You can configure MLflow to store experiment artifacts such as models, images, and logs in an Amazon S3 bucket for scalable and durable storage. This is especially useful in distributed environments where multiple users or systems access the same artifacts.
Access S3 Object Using AWS CLI
Follow these steps to access, download, or view an image stored in S3, for example: s3://mlflow-artifacts-hv/my-folder/image.png
.
- Verify AWS CLI installation
aws --version
# If not installed:
# On Ubuntu: sudo apt install awscli
# On Mac: brew install awscli
- Configure AWS CLI (if not already).
aws configure
Provide the following details when prompted:
- AWS Access Key ID
- AWS Secret Access Key
- Default Region (e.g.,
us-east-1
)
- List files in S3 path.
aws s3 ls s3://mlflow-artifacts-hv/my-folder/
- Download image from S3.
aws s3 cp s3://mlflow-artifacts-hv/my-folder/image.png.
This command downloads the image to your current working directory.
- View the image.
xdg-open image.png # Linux
open image.png # macOS
- (Optional) Generate a pre-signed URL.
If the bucket is private, you can generate a temporary URL to access it in a browser.
aws s3 presign s3://mlflow-artifacts-hv/my-folder/image.png --expires-in 3600
This creates a secure URL valid for 1 hour (3600 seconds).
Example Use Cases
Task | Command |
---|---|
List bucket | aws s3 ls s3://mlflow-artifacts-hv/ |
List folder | aws s3 ls s3://mlflow-artifacts-hv/my-folder/ |
Download image | aws s3 cp s3://mlflow-artifacts-hv/my-folder/image.png . |
Generate public URL | aws s3 presign s3://mlflow-artifacts-hv/yourfile |
The below example shows how to store and access MLflow artifacts in S3 using the AWS CLI.
export AWS_ACCESS_KEY_ID={}
export AWS_SECRET_ACCESS_KEY={}
export AWS_DEFAULT_REGION=us-east-1
mlflow server
--backend-store-uri mysql+pymysql://mlflow:mlflow@10.100.11.70:3306/mlflow --default-artifact-root s3://ad-odp/mlflow-artifacts-hv/
--host 0.0.0.0 --port 5000
pip install boto3
āx
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import matplotlib.pyplot as plt
import joblib
import os
ā
# š Set tracking URI and experiment
mlflow.set_tracking_uri("http://10.100.11.72:5000")
mlflow.set_experiment("rf-diabetes-s3")
ā
# š¦ Load dataset
data = load_diabetes()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="target")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
ā
# šÆ Start MLflow run
with mlflow.start_run(run_name="rf-diabetes-model") as run:
# š§ Train model
model = RandomForestRegressor(n_estimators=100, max_depth=6, random_state=42)
model.fit(X_train, y_train)
ā
# š Predict and evaluate
preds = model.predict(X_test)
mse = mean_squared_error(y_test, preds)
r2 = r2_score(y_test, preds)
ā
# š Log params, metrics
mlflow.log_param("n_estimators", 100)
mlflow.log_param("max_depth", 6)
mlflow.log_metric("mse", mse)
mlflow.log_metric("r2", r2)
ā
# š¾ Save model file
model_path = "rf_model.pkl"
joblib.dump(model, model_path)
mlflow.log_artifact(model_path)
ā
# š Save and log plot
plt.figure()
plt.scatter(y_test, preds)
plt.xlabel("Actual")
plt.ylabel("Predicted")
plt.title("Actual vs Predicted")
plot_path = "actual_vs_predicted.png"
plt.savefig(plot_path)
mlflow.log_artifact(plot_path)
ā
# ⨠Log model with signature
mlflow.sklearn.log_model(
sk_model=model,
artifact_path="sklearn-model",
input_example=X_test[:5],
signature=mlflow.models.signature.infer_signature(X_test, preds),
)
ā
print(f"ā
Run complete: {run.info.run_id}")
print(f"š UI: http://10.100.11.72:5000/#/experiments/{run.info.experiment_id}/runs/{run.info.run_id}")

[root@kafkaingestion2 ~]# aws s3 ls s3://ad-odp/mlflow-artifacts-hv/
PRE 19/
PRE 20/
2025-07-07 15:06:14 0
2025-07-07 15:08:11 203887 fil1-bk.png
[root@kafkaingestion2 ~]#
[root@kafkaingestion2 ~]# aws s3 ls s3://ad-odp/mlflow-artifacts-hv/PRE 20
ā
Unknown options: 20
[root@kafkaingestion2 ~]# aws s3 ls "s3://ad-odp/mlflow-artifacts-hv/PRE 20"
[root@kafkaingestion2 ~]# aws s3 ls s3://ad-odp/mlflow-artifacts-hv/20/
ā
PRE 24ddf859eca043b6929b0c3fb93a3869/
PRE models/
[root@kafkaingestion2 ~]#
[root@kafkaingestion2 ~]# aws s3 ls s3://ad-odp/mlflow-artifacts-hv/20/models
PRE models/
[root@kafkaingestion2 ~]# aws s3 ls s3://ad-odp/mlflow-artifacts-hv/20/models/
PRE m-332f7c62773949ae94cddee5af8b810a/
[root@kafkaingestion2 ~]# aws s3 ls s3://ad-odp/mlflow-artifacts-hv/20/models/m-332f7c62773949ae94cddee5af8b810a/
PRE artifacts/
[root@kafkaingestion2 ~]# aws s3 ls s3://ad-odp/mlflow-artifacts-hv/20/models/m-332f7c62773949ae94cddee5af8b810a/artifacts
PRE artifacts/
[root@kafkaingestion2 ~]# aws s3 ls s3://ad-odp/mlflow-artifacts-hv/20/models/m-332f7c62773949ae94cddee5af8b810a/artifacts/
2025-07-07 15:50:11 1559 MLmodel
2025-07-07 15:50:11 211 conda.yaml
2025-07-07 15:50:06 1183 input_example.json
2025-07-07 15:50:07 731401 model.pkl
2025-07-07 15:50:12 115 python_env.yaml
2025-07-07 15:50:12 93 requirements.txt
2025-07-07 15:50:07 1762 serving_input_example.json
[root@kafkaingestion2 ~]#


cat localtest.py
import mlflow
import os
ā
# Optional: set the MLflow tracking URI (only needed if not default localhost:5000)
mlflow.set_tracking_uri("http://10.100.11.39:5000")
ā
ā
# Start an experiment (or get existing one)
experiment_name = "artifact_test"
mlflow.set_experiment(experiment_name)
ā
with mlflow.start_run(run_name="simple_artifact_test") as run:
# Log a parameter
mlflow.log_param("param1", 123)
ā
# Log a metric
mlflow.log_metric("accuracy", 0.95)
ā
# Log an artifact (e.g., a simple text file)
os.makedirs("artifacts", exist_ok=True)
with open("artifacts/output.txt", "w") as f:
f.write("This is a test artifact for MLflow.")
ā
mlflow.log_artifacts("artifacts")
ā
print(f"Run ID: {run.info.run_id}")
print("Check the MLflow UI or artifact location for 'output.txt'.")
cat hdfstest.py
import mlflow
import os
ā
# Optional: Set the tracking URI if you're not using default (e.g., if using remote MLflow server)
mlflow.set_tracking_uri("http://10.100.11.39:5000")
ā
# Create or load an experiment
experiment_name = "hdfs_artifact_test"
mlflow.set_experiment(experiment_name)
ā
with mlflow.start_run(run_name="hdfs_debug_run") as run:
print(f"ā
Experiment Name: {experiment_name}")
print(f"š Run ID: {run.info.run_id}")
print(f"š Artifact URI: {mlflow.get_artifact_uri()}")
ā
# Log sample param and metric
mlflow.log_param("framework", "mlflow")
mlflow.log_metric("accuracy", 0.99)
ā
# Create and log a test artifact
os.makedirs("artifacts", exist_ok=True)
artifact_file = "artifacts/test_hdfs_artifact.txt"
with open(artifact_file, "w") as f:
f.write("This is a test file to verify HDFS artifact logging.")
ā
mlflow.log_artifacts("artifacts")
ā
print("\nšÆ Artifact log complete.")
print("š Check the above HDFS path or run this on your shell:")
print(f"hdfs dfs -ls -R {mlflow.get_artifact_uri().replace('hdfs://', '/')}")
cat s3test.py
import mlflow
import os
ā
# Optional: Set MLflow tracking URI if not default
mlflow.set_tracking_uri("http://10.100.11.39:5000")
ā
# Set the experiment name
experiment_name = "s3_artifact_test"
mlflow.set_experiment(experiment_name)
ā
with mlflow.start_run(run_name="s3_debug_run") as run:
print(f"ā
Experiment Name: {experiment_name}")
print(f"š Run ID: {run.info.run_id}")
print(f"š Artifact URI: {mlflow.get_artifact_uri()}")
ā
# Log param and metric
mlflow.log_param("storage", "s3")
mlflow.log_metric("accuracy", 0.92)
ā
# Write and log a test artifact
os.makedirs("artifacts", exist_ok=True)
with open("artifacts/s3_test_file.txt", "w") as f:
f.write("S3 artifact upload test for bucket ad-odp/mlflow-artifacts-hv.")
ā
mlflow.log_artifacts("artifacts")
ā
print("\nšÆ Artifact logged successfully.")
print("š Check it via AWS CLI:")
print("aws s3 ls s3://ad-odp/mlflow-artifacts-hv/ --recursive")
Was this page helpful?