# This makefile demos monitoring for the random ML model.
#
# Setup:
# - To run *locally*, store a git-ignored `.env` secrets file in the repo root.
#	It should export the following MongoDB environment variables:
# 		export MONGODB_USERNAME=??
# 		export MONGODB_PASSWORD=??
# 		export MONGODB_URL="??"
#   Notes:
#		- Put the URL in "quotes" to ensure that special characters aren't
#			interpreted by the shell.
# - To run on *codespaces*, add the secrets for the three variables listed above
#		to your repo under the repo's settings in GitHub.
#   Notes:
#   	- Don't quote the MONGODB_URL value in the GitHub secrets.
#   	- Because Codespaces IP addresses are dynamic and can and do change,
#			you'll need to whitelist the Codespace's external IP address under
#			Network Access on your MongoDB Cloud.
#   	- Find your Codespace's IP by running one of these in the terminal:
#			`curl icanhazip.com`
#			`curl ifconfig.me ; echo`
#
# To run it, do these steps in the CLI:
# 1. `make mlflow` to start MLFlow server used to log the model performance.
# 2. `make` (in a second CLI) to run the data/ML engineering pipeline.
# 2a. Open the MLFlow UI, navigate to the tagifai_sgd model, and add an
#		alias "@prod" to the latest version.
# 3. `make rest` to start the REST API server;
# 		see `src/api.py` for the API specification.
# 4. Test the REST API using cURL in a third CLI;
# 		see `src/api.py` for sample cURL commands.
#
# Notes:
# - The dataset is now stored in a MongoDB collection. Build the collection
#		using `pymongo_load.py`; read it using `pymongo_read.py`.
# - I installed 'pymongo[srv]' as specified in the guide.
#     - https://www.mongodb.com/docs/guides/
#
SHELL = /bin/bash

BASE_DIR        := .
DATA_DIR        := $(BASE_DIR)/data
SRC_DIR         := $(BASE_DIR)/src
MODELS_DIR      := $(BASE_DIR)/models
LOGS_DIR        := $(BASE_DIR)/logs

DATASET         := $(DATA_DIR)/dataset.csv
MODEL_RANDOM    := $(MODELS_DIR)/model_random.pkl

MLFLOW_DOMAIN   := localhost
MLFLOW_PORT     := 8080
MLFLOW_URI      := http://$(MLFLOW_DOMAIN):$(MLFLOW_PORT)

# Arbitrarily set an ending date for training records.
TRAIN_DATE			  := "2020-10-01"

ALL: $(MODEL_RANDOM)

$(LOGS_DIR):
	mkdir -p logs
$(MODELS_DIR):
	mkdir -p models

$(MODEL_RANDOM): $(DATASET) $(MODELS_DIR) $(LOGS_DIR)
	# If .env exists (locally), sort it to set MongoDB secrets.
	# If it doesn't exist (codespaces), assume that it's set in the environment.
	[ -f .env ] && source .env || true && \
	python $(SRC_DIR)/train_model.py \
		--dataset-input-filename $(DATASET) \
		--model-output-filename $(MODEL_RANDOM) \
		--model-train-date $(TRAIN_DATE) \
		--mlflow-uri $(MLFLOW_URI)

.PHONY: clean
clean:
	rm -rf $(MODEL_RANDOM)


# Preconfigured ML environment startup commands.
.PHONY: mlflow rest mongodb_env
mlflow:
	mlflow server --host $(MLFLOW_DOMAIN) --port $(MLFLOW_PORT)
rest:
	[ -f .env ] && source .env || true && \
	MLFLOW_TRACKING_URI=$(MLFLOW_URI) uvicorn src.api:app \
		--host localhost \
		--port 8000 \
		--reload
