#!/usr/bin/env python
# coding=utf-8

__author__ = "TrackMe Limited"
__copyright__ = "Copyright 2023-2025, TrackMe Limited, U.K."
__credits__ = "TrackMe Limited, U.K."
__license__ = "TrackMe Limited, all rights reserved"
__version__ = "0.1.0"
__maintainer__ = "TrackMe Limited, U.K."
__email__ = "support@trackme-solutions.com"
__status__ = "PRODUCTION"

# Standard library imports
import os
import sys
import json
import time
import logging

# Networking and URL handling imports
import requests
from urllib.parse import urlencode
import urllib3

# Disable insecure request warnings for urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# splunk home
splunkhome = os.environ["SPLUNK_HOME"]

# append lib lib
sys.path.append(os.path.join(splunkhome, "etc", "apps", "trackme", "lib"))

# import trackme libs
from trackme_libs import run_splunk_search

# import trackme libs utils
from trackme_libs_utils import remove_leading_spaces, escape_backslash

# logging:
# To avoid overriding logging destination of callers, the libs will not set on purpose any logging definition
# and rely on callers themselves


def train_mlmodel(
    service,
    splunkd_uri,
    session_key,
    username,
    tenant_id,
    component,
    object_value,
    key_value,
    tenant_trackme_metric_idx,
    mode,
    entities_outliers,
    entity_outlier,
    entity_outlier_dict,
    model_json_def,
):

    logging.debug(f"starting function train_mlmodel")

    # Define an header for requests authenticated communications with splunkd
    header = {
        "Authorization": "Splunk %s" % session_key,
        "Content-Type": "application/json",
    }

    # if mode = live
    if mode == "live":
        try:
            is_disabled = entity_outlier_dict["is_disabled"]
            kpi_metric = entity_outlier_dict["kpi_metric"]
            kpi_span = entity_outlier_dict["kpi_span"]
            method_calculation = entity_outlier_dict["method_calculation"]
            density_lowerthreshold = entity_outlier_dict["density_lowerthreshold"]
            density_upperthreshold = entity_outlier_dict["density_upperthreshold"]
            alert_lower_breached = entity_outlier_dict["alert_lower_breached"]
            alert_upper_breached = entity_outlier_dict["alert_upper_breached"]
            period_calculation = entity_outlier_dict["period_calculation"]
            time_factor = entity_outlier_dict["time_factor"]
            perc_min_lowerbound_deviation = entity_outlier_dict[
                "perc_min_lowerbound_deviation"
            ]
            perc_min_upperbound_deviation = entity_outlier_dict[
                "perc_min_upperbound_deviation"
            ]
            min_value_for_lowerbound_breached = entity_outlier_dict.get(
                "min_value_for_lowerbound_breached", 0
            )
            min_value_for_upperbound_breached = entity_outlier_dict.get(
                "min_value_for_upperbound_breached", 0
            )
            static_lower_threshold = entity_outlier_dict.get(
                "static_lower_threshold", None
            )
            static_upper_threshold = entity_outlier_dict.get(
                "static_upper_threshold", None
            )
            period_exclusions = entity_outlier_dict.get("period_exclusions", [])
            # ensure period_exclusions is a list, otherwise set it to an empty list
            if not isinstance(period_exclusions, list):
                period_exclusions = []

            # get the algorithm
            algorithm = entity_outlier_dict.get("algorithm", "DensityFunction")

            # get the boundaries_extraction_macro
            boundaries_extraction_macro = entity_outlier_dict.get(
                "boundaries_extraction_macro", "splk_outliers_extract_boundaries"
            )

            # optional extra parameters for the fit command
            fit_extra_parameters = entity_outlier_dict.get("fit_extra_parameters", None)

            # optional extra parameters for the apply command
            apply_extra_parameters = entity_outlier_dict.get(
                "apply_extra_parameters", None
            )

            # optional period_calculation_latest
            period_calculation_latest = entity_outlier_dict.get(
                "period_calculation_latest", "now"
            )

            rules_summary = {
                "is_disabled": is_disabled,
                "kpi_metric": kpi_metric,
                "kpi_span": kpi_span,
                "method_calculation": method_calculation,
                "density_lowerthreshold": density_lowerthreshold,
                "density_upperthreshold": density_upperthreshold,
                "period_calculation": period_calculation,
                "period_calculation_latest": period_calculation_latest,
                "time_factor": time_factor,
                "perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
                "perc_min_upperbound_deviation": perc_min_upperbound_deviation,
                "alert_lower_breached": alert_lower_breached,
                "alert_upper_breached": alert_upper_breached,
                "min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
                "min_value_for_upperbound_breached": min_value_for_upperbound_breached,
                "static_lower_threshold": static_lower_threshold,
                "static_upper_threshold": static_upper_threshold,
                "period_exclusions": period_exclusions,
                "algorithm": algorithm,
                "boundaries_extraction_macro": boundaries_extraction_macro,
                "fit_extra_parameters": fit_extra_parameters,
                "apply_extra_parameters": apply_extra_parameters,
            }

            logging.debug(
                f'Processing outliers entity="{entity_outlier}", rules_summary="{rules_summary}"'
            )

        except Exception as e:
            msg = f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", entity_outlier="{entity_outlier}", failed to extract one or more expected settings from the entity, is this record corrupted? Exception="{str(e)}"'
            logging.error(msg)
            raise Exception(msg)

    elif mode == "simulation":

        # log debug
        logging.debug("mode is simulation")

        # log debug
        logging.debug(f"model_json_def={model_json_def}")

        # load the model definition as a dict
        try:
            model_json_def = json.loads(model_json_def)
            # log debug
            logging.debug(
                f'successfully loaded model_json_def="{json.dumps(model_json_def, indent=4)}"'
            )
        except Exception as e:
            msg = f'failed to load the submitted model_json_def="{model_json_def}" with exception="{e}"'
            logging.error(msg)
            raise Exception(msg)

        # get definitions from the model_json_def
        is_disabled = model_json_def["is_disabled"]
        kpi_metric = model_json_def["kpi_metric"]
        kpi_span = model_json_def["kpi_span"]
        method_calculation = model_json_def["method_calculation"]
        density_lowerthreshold = model_json_def["density_lowerthreshold"]
        density_upperthreshold = model_json_def["density_upperthreshold"]
        alert_lower_breached = model_json_def["alert_lower_breached"]
        alert_upper_breached = model_json_def["alert_upper_breached"]
        period_calculation = model_json_def["period_calculation"]
        # optional
        period_calculation_latest = model_json_def.get(
            "period_calculation_latest", "now"
        )
        time_factor = model_json_def["time_factor"]
        perc_min_lowerbound_deviation = model_json_def["perc_min_lowerbound_deviation"]
        perc_min_upperbound_deviation = model_json_def["perc_min_upperbound_deviation"]
        min_value_for_lowerbound_breached = model_json_def.get(
            "min_value_for_lowerbound_breached", 0
        )
        min_value_for_upperbound_breached = model_json_def.get(
            "min_value_for_upperbound_breached", 0
        )
        static_lower_threshold = model_json_def.get("static_lower_threshold", None)
        static_upper_threshold = model_json_def.get("static_upper_threshold", None)

        # period exclusions is an exception and is defined at the level of the model KVstore record
        period_exclusions = entity_outlier_dict.get("period_exclusions", [])
        # ensure period_exclusions is a list, otherwise set it to an empty list
        if not isinstance(period_exclusions, list):
            period_exclusions = []

        # get the algorithm
        algorithm = entity_outlier_dict.get("algorithm", "DensityFunction")

        # get the boundaries_extraction_macro
        boundaries_extraction_macro = entity_outlier_dict.get(
            "boundaries_extraction_macro", "splk_outliers_extract_boundaries"
        )

        # optional extra parameters for the fit command
        fit_extra_parameters = entity_outlier_dict.get("fit_extra_parameters", None)

        # optional extra parameters for the apply command
        apply_extra_parameters = entity_outlier_dict.get("apply_extra_parameters", None)

        rules_summary = {
            "is_disabled": is_disabled,
            "kpi_metric": kpi_metric,
            "kpi_span": kpi_span,
            "method_calculation": method_calculation,
            "density_lowerthreshold": density_lowerthreshold,
            "density_upperthreshold": density_upperthreshold,
            "period_calculation": period_calculation,
            "period_calculation_latest": period_calculation_latest,
            "time_factor": time_factor,
            "perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
            "perc_min_upperbound_deviation": perc_min_upperbound_deviation,
            "alert_lower_breached": alert_lower_breached,
            "alert_upper_breached": alert_upper_breached,
            "min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
            "min_value_for_upperbound_breached": min_value_for_upperbound_breached,
            "static_lower_threshold": static_lower_threshold,
            "static_upper_threshold": static_upper_threshold,
            "period_exclusions": period_exclusions,
            "algorithm": algorithm,
            "boundaries_extraction_macro": boundaries_extraction_macro,
            "fit_extra_parameters": fit_extra_parameters,
            "apply_extra_parameters": apply_extra_parameters,
        }

        logging.debug(
            f'Processing outliers entity="{entity_outlier}", rules_summary="{rules_summary}"'
        )

    #
    # Proceed
    #

    # Define the Splunk searches
    ml_model_gen_search = None
    ml_model_render_search = None

    # Set the densityFunction threshold parameters
    if float(density_lowerthreshold) > 0 and float(density_upperthreshold) > 0:
        density_threshold_str = f"lower_threshold={density_lowerthreshold} upper_threshold={density_upperthreshold}"
    else:
        density_threshold_str = "lower_threshold=0.005 upper_threshold=0.005"
        error_msg = f"""\
            "densityFunction threshold parameters are incorrects for this entity,
            lower_threshold and upper_threshold must both be a positive value,
            will be using using factory value.
            """
        logging.error(
            f'tenant_id="{tenant_id}", compoent="{component}", object="{tenant_id}", {error_msg}'
        )

    # Construct the where NOT conditions, and also verifies if the period_exclusions are valid
    where_conditions = ""
    if period_exclusions:
        for period in period_exclusions:
            logging.debug(f"period_exclusion: {period}")

            # get the period_latest
            period_latest = period["latest"]

            # period_calculation is a time relative expression to now, such as -30d for the past 30 days from now, so we need to convert it to a timestamp
            # extract the first two digits after the minus sign which corresponds to the number of days, then convert to seconds, and apply against the current time
            period_calculation_no_days = int(period_calculation[1:3]) * 86400
            period_calculation_timestamp = int(time.time()) - period_calculation_no_days

            # if the period_earliest and period_latest are not valid, then we need to skip this period_exclusion
            if period_latest < period_calculation_timestamp:
                logging.info(
                    f"tenant_id={tenant_id}, object={object_value}, model_id={entity_outlier} rejecting period exclusion as it is now out of the model period calculation: {json.dumps(period, indent=4)}"
                )

                # delete the period_exclusion from the list
                period_exclusions.remove(period)

                # update the entity_outlier_dict
                entity_outlier_dict["period_exclusions"] = period_exclusions

            else:
                logging.info(
                    f"tenant_id={tenant_id}, object={object_value}, model_id={entity_outlier} accepting period exclusion: {json.dumps(period, indent=4)}"
                )
                where_conditions += f'``` period_exclusions for this ML model: ```\n| where NOT (_time>{period["earliest"]} AND _time<{period["latest"]})\n'

    else:
        where_conditions = "``` no period_exclusions for this ML model ```"

    # set the lookup name
    if mode == "live":
        ml_model_lookup_name = f"__mlspl_{entity_outlier}.mlmodel"
        ml_model_lookup_shortname = f"{entity_outlier}"
    elif mode == "simulation":
        ml_model_lookup_name = f"__mlspl_simulation_{entity_outlier}.mlmodel"
        ml_model_lookup_shortname = f"simulation_{entity_outlier}"

    #
    # Delete current ML model
    #

    # if the current ml model exists, then we need to delete it
    if os.path.exists(
        os.path.join(
            splunkhome,
            "etc",
            "users",
            "splunk-system-user",
            "trackme",
            "lookups",
            ml_model_lookup_name,
        )
    ):

        # Attempt to delete the current ml model
        rest_url = f"{splunkd_uri}/servicesNS/splunk-system-user/trackme/data/lookup-table-files/{ml_model_lookup_name}"

        logging.info(
            f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", attempting to delete Machine Learning lookup_name="{ml_model_lookup_name}"'
        )
        try:
            response = requests.delete(
                rest_url,
                headers=header,
                verify=False,
                timeout=300,
            )
            if response.status_code not in (200, 201, 204):
                logging.warning(
                    f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", failure to delete ML lookup_name="{ml_model_lookup_name}", this might be expected if the model does not exist yet or has been deleted manually, url="{rest_url}", response.status_code="{response.status_code}", response.text="{response.text}"'
                )
            else:
                logging.info(
                    f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", action="success", deleted lookup_name="{ml_model_lookup_name}" successfully'
                )

                # Update ml_model_filesize / ml_model_lookup_share
                if mode == "live":
                    entity_outlier_dict["ml_model_filesize"] = "pending"
                    entity_outlier_dict["ml_model_lookup_share"] = "pending"
                elif mode == "simulation":
                    entity_outlier_dict["ml_model_simulation_filesize"] = "pending"
                    entity_outlier_dict["ml_model_simulation_lookup_share"] = "pending"

        except Exception as e:
            logging.error(
                f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", failure to delete ML lookup_name="{ml_model_lookup_name}" with exception="{str(e)}"'
            )

    #
    # Set and run the Machine Learning model training search
    #

    # define the gen search, handle the search depending on if time_factor is set to none or not
    if time_factor == "none":
        fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} into {ml_model_lookup_shortname}"

        # if any, add extra parameters to the fit command
        if fit_extra_parameters:
            fit_command += f" {fit_extra_parameters}"

        ml_model_gen_search = remove_leading_spaces(
            f"""\
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
            {where_conditions}
            | {fit_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            | fields _time {kpi_metric} LowerBound UpperBound
            | stats count as metrics_count
            """
        )

    else:
        fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} into {ml_model_lookup_shortname} by factor"

        # if any, add extra parameters to the fit command
        if fit_extra_parameters:
            fit_command += f" {fit_extra_parameters}"

        ml_model_gen_search = remove_leading_spaces(
            f"""\
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
            {where_conditions}
            | eval factor=strftime(_time, "{time_factor}")
            | {fit_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            | fields _time {kpi_metric} LowerBound UpperBound
            | stats count as metrics_count
            """
        )

    # log debug
    logging.debug(f'ml_model_gen_search="{ml_model_gen_search}"')

    # define the render search depending on if time_factor is set to none or not, to be stored for further usage purposes
    if time_factor == "none":
        apply_command = f"apply {ml_model_lookup_shortname}"

        # if any, add extra parameters to the apply command
        if apply_extra_parameters:
            apply_command += f" {apply_extra_parameters}"

        ml_model_render_search = remove_leading_spaces(
            f"""
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}"
            tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
            | {apply_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            | fields _time {kpi_metric} LowerBound UpperBound
        """
        )

    else:
        apply_command = f"apply {ml_model_lookup_shortname}"

        # if any, add extra parameters to the apply command
        if apply_extra_parameters:
            apply_command += f" {apply_extra_parameters}"

        ml_model_render_search = remove_leading_spaces(
            f"""
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}"
            tenant_id="{tenant_id}" object_category="splk-{component}" object="{object_value}" by object span="{kpi_span}"
            | eval factor=strftime(_time, "{time_factor}")
            | {apply_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            | fields _time {kpi_metric} LowerBound UpperBound
        """
        )

    # set kwargs
    kwargs_oneshot = {
        "earliest_time": str(period_calculation),
        "latest_time": str(period_calculation_latest),
        "output_mode": "json",
        "count": 0,
    }

    #
    # Run
    #

    # run search

    # track the search runtime
    start = time.time()

    # proceed
    try:
        reader = run_splunk_search(
            service,
            ml_model_gen_search,
            kwargs_oneshot,
            24,
            5,
        )

        for item in reader:
            if isinstance(item, dict):
                # log
                logging.info(
                    f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", kpi_metric="{kpi_metric}", entity_outlier="{entity_outlier}", Machine Learning model training search executed successfully, run_time="{round(time.time() - start, 3)}", results="{json.dumps(item, indent=0)}"'
                )

            # retrieve the current share level
            if mode == "live":
                entity_outlier_dict["ml_model_lookup_share"] = "pending"
            elif mode == "simulation":
                entity_outlier_dict["ml_model_lookup_share"] = "pending"

            # Update ml_model_lookup_share
            entity_outlier_dict["ml_model_lookup_share"] = "private"

            # Update owner and perms
            entity_outlier_dict["ml_model_lookup_owner"] = "splunk-system-user"

    except Exception as e:
        msg = f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", kpi_metric="{kpi_metric}", entity_outlier="{entity_outlier}", Machine Learning model training search failed with exception="{str(e)}", run_time="{str(time.time() - start)}"'
        logging.error(msg)
        raise Exception(msg)

    if mode == "live":

        # Update last_exec
        entity_outlier_dict["last_exec"] = str(time.time())

        # Update ml_model_gen_search
        entity_outlier_dict["ml_model_gen_search"] = ml_model_gen_search

        # Update
        entity_outlier_dict["ml_model_render_search"] = ml_model_render_search

        # Update rules_access_search
        entity_outlier_dict["rules_access_search"] = (
            f'| inputlookup trackme_{component}_outliers_entity_rules_tenant_{tenant_id} where _key="{key_value}"'
        )

        # Update ml_model_filename
        entity_outlier_dict["ml_model_filename"] = ml_model_lookup_name

        # Update ml_model_summary_search
        entity_outlier_dict["ml_model_summary_search"] = f"| summary {entity_outlier}"

        # Update ml_model_filesize
        try:
            entity_outlier_dict["ml_model_filesize"] = os.path.getsize(
                os.path.join(
                    splunkhome,
                    "etc",
                    "users",
                    "splunk-system-user",
                    "trackme",
                    "lookups",
                    ml_model_lookup_name,
                )
            )

        except Exception as e:
            logging.info(
                f'tenant_id="{tenant_id}", size of the ML lookup_name="{ml_model_lookup_name}" cannot be determined yet as the model may not be ready, response="{str(e)}"'
            )
            entity_outlier_dict["ml_model_filesize"] = "pending"

        # Update the main dict
        entities_outliers[entity_outlier] = entity_outlier_dict

    elif mode == "simulation":

        # Update last_exec
        entity_outlier_dict["ml_model_simulation_last_exec"] = str(time.time())

        # Update ml_model_gen_search
        entity_outlier_dict["ml_model_simulation_gen_search"] = ml_model_gen_search

        # Update
        entity_outlier_dict["ml_model_simulation_render_search"] = (
            ml_model_render_search
        )

        # Update rules_access_search
        entity_outlier_dict["ml_model_simulation_rules_access_search"] = (
            f'| inputlookup trackme_{component}_outliers_entity_rules_tenant_{tenant_id} where _key="{key_value}"'
        )

        # Update ml_model_filename
        entity_outlier_dict["ml_model_simulation_filename"] = ml_model_lookup_name

        # Update ml_model_summary_search
        entity_outlier_dict["ml_model_simulation_summary_search"] = (
            f"| summary {entity_outlier}"
        )

        # Update ml_model_filesize
        try:
            entity_outlier_dict["ml_model_simulation_filesize"] = os.path.getsize(
                os.path.join(
                    splunkhome,
                    "etc",
                    "users",
                    "splunk-system-user",
                    "trackme",
                    "lookups",
                    ml_model_lookup_name,
                )
            )
        except Exception as e:
            logging.info(
                f'tenant_id="{tenant_id}", size of the ML lookup_name="{ml_model_lookup_name}" cannot be determined yet as the model may not be ready, response="{str(e)}"'
            )
            entity_outlier_dict["ml_model_simulation_filesize"] = "pending"

        # Update the main dict
        entities_outliers[entity_outlier] = entity_outlier_dict

    #
    # End
    #

    # finally, return entities_outliers
    return entities_outliers, entity_outlier, entity_outlier_dict


def return_lightsimulation_search(
    tenant_id, component, object_value, metric_idx, model_json_def
):

    # log debug
    logging.debug("mode is simulation")

    # log debug
    logging.debug(f"model_json_def={model_json_def}")

    # load the model definition as a dict
    if not isinstance(model_json_def, dict):
        try:
            model_json_def = json.loads(model_json_def)
            # log debug
            logging.debug(
                f'successfully loaded model_json_def="{json.dumps(model_json_def, indent=4)}"'
            )
        except Exception as e:
            msg = f'failed to load the submitted model_json_def="{model_json_def}" with exception="{e}"'
            logging.error(msg)
            raise Exception(msg)

    # get definitions from the model_json_def
    kpi_metric = model_json_def["kpi_metric"]
    kpi_span = model_json_def["kpi_span"]
    method_calculation = model_json_def["method_calculation"]
    density_lowerthreshold = model_json_def["density_lowerthreshold"]
    density_upperthreshold = model_json_def["density_upperthreshold"]
    alert_lower_breached = model_json_def["alert_lower_breached"]
    alert_upper_breached = model_json_def["alert_upper_breached"]
    period_calculation = model_json_def["period_calculation"]
    # optional period_calculation_latest
    period_calculation_latest = model_json_def.get("period_calculation_latest", "now")
    time_factor = model_json_def["time_factor"]
    perc_min_lowerbound_deviation = model_json_def["perc_min_lowerbound_deviation"]
    perc_min_upperbound_deviation = model_json_def["perc_min_upperbound_deviation"]
    min_value_for_lowerbound_breached = model_json_def.get(
        "min_value_for_lowerbound_breached", 0
    )
    min_value_for_upperbound_breached = model_json_def.get(
        "min_value_for_upperbound_breached", 0
    )
    static_lower_threshold = model_json_def.get("static_lower_threshold", None)
    static_upper_threshold = model_json_def.get("static_upper_threshold", None)
    algorithm = model_json_def.get("algorithm", "DensityFunction")
    boundaries_extraction_macro = model_json_def.get(
        "boundaries_extraction_macro", "splk_outliers_extract_boundaries"
    )
    fit_extra_parameters = model_json_def.get("fit_extra_parameters", None)
    apply_extra_parameters = model_json_def.get("apply_extra_parameters", None)

    rules_summary = {
        "kpi_metric": kpi_metric,
        "kpi_span": kpi_span,
        "method_calculation": method_calculation,
        "density_lowerthreshold": density_lowerthreshold,
        "density_upperthreshold": density_upperthreshold,
        "period_calculation": period_calculation,
        "period_calculation_latest": period_calculation_latest,
        "time_factor": time_factor,
        "perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
        "perc_min_upperbound_deviation": perc_min_upperbound_deviation,
        "alert_lower_breached": alert_lower_breached,
        "alert_upper_breached": alert_upper_breached,
        "min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
        "min_value_for_upperbound_breached": min_value_for_upperbound_breached,
        "static_lower_threshold": static_lower_threshold,
        "static_upper_threshold": static_upper_threshold,
        "algorithm": algorithm,
        "boundaries_extraction_macro": boundaries_extraction_macro,
        "fit_extra_parameters": fit_extra_parameters,
        "apply_extra_parameters": apply_extra_parameters,
    }

    logging.debug(f'Processing outliers simulation rules_summary="{rules_summary}"')

    #
    # Proceed
    #

    # Define the Splunk searches
    ml_model_gen_search = None

    # Set the densityFunction threshold parameters
    if float(density_lowerthreshold) > 0 and float(density_upperthreshold) > 0:
        density_threshold_str = f"lower_threshold={density_lowerthreshold} upper_threshold={density_upperthreshold}"
    else:
        density_threshold_str = "lower_threshold=0.005 upper_threshold=0.005"
        error_msg = f"""\
            "densityFunction threshold parameters are incorrects for this entity,
            lower_threshold and upper_threshold must both be a positive value,
            will be using using factory value.
            """
        logging.error(
            f'tenant_id="{tenant_id}", compoent="{component}", object="{tenant_id}", {error_msg}'
        )

    # define the gen search, handle the search depending on if time_factor is set to none or not
    if time_factor == "none":

        fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str}"

        # if any, add extra parameters to the fit command
        if fit_extra_parameters:
            fit_command += f" {fit_extra_parameters}"

        ml_model_gen_search = remove_leading_spaces(
            f"""\
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
            | {fit_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            """
        )

    else:

        fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} by factor"

        # if any, add extra parameters to the fit command
        if fit_extra_parameters:
            fit_command += f" {fit_extra_parameters}"

        ml_model_gen_search = remove_leading_spaces(
            f"""\
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
            | eval factor=strftime(_time, "{time_factor}")
            | {fit_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            """
        )

    # log debug
    logging.debug(f'ml_model_gen_search="{ml_model_gen_search}"')

    return ml_model_gen_search


def train_cim_mlmodel(
    service,
    splunkd_uri,
    session_key,
    username,
    tenant_id,
    component,
    object_value,
    key_value,
    cim_field,
    tenant_trackme_metric_idx,
    mode,
    entities_outliers,
    entity_outlier,
    entity_outlier_dict,
    model_json_def,
):

    logging.debug(f"starting function train_mlmodel")

    # Define an header for requests authenticated communications with splunkd
    header = {
        "Authorization": "Splunk %s" % session_key,
        "Content-Type": "application/json",
    }

    # if mode = live
    if mode == "live":
        try:
            is_disabled = entity_outlier_dict["is_disabled"]
            cim_breakby = entity_outlier_dict["cim_breakby"]
            entity_name = entity_outlier_dict["entity"]
            cim_field = entity_outlier_dict["cim_field"]
            kpi_metric = entity_outlier_dict["kpi_metric"]
            kpi_span = entity_outlier_dict["kpi_span"]
            method_calculation = entity_outlier_dict["method_calculation"]
            density_lowerthreshold = entity_outlier_dict["density_lowerthreshold"]
            density_upperthreshold = entity_outlier_dict["density_upperthreshold"]
            alert_lower_breached = entity_outlier_dict["alert_lower_breached"]
            alert_upper_breached = entity_outlier_dict["alert_upper_breached"]
            period_calculation = entity_outlier_dict["period_calculation"]
            # optional period_calculation_latest
            period_calculation_latest = entity_outlier_dict.get(
                "period_calculation_latest", "now"
            )
            time_factor = entity_outlier_dict["time_factor"]
            perc_min_lowerbound_deviation = entity_outlier_dict[
                "perc_min_lowerbound_deviation"
            ]
            perc_min_upperbound_deviation = entity_outlier_dict[
                "perc_min_upperbound_deviation"
            ]
            min_value_for_lowerbound_breached = entity_outlier_dict.get(
                "min_value_for_lowerbound_breached", 0
            )
            min_value_for_upperbound_breached = entity_outlier_dict.get(
                "min_value_for_upperbound_breached", 0
            )
            static_lower_threshold = entity_outlier_dict.get(
                "static_lower_threshold", None
            )
            static_upper_threshold = entity_outlier_dict.get(
                "static_upper_threshold", None
            )
            period_exclusions = entity_outlier_dict.get("period_exclusions", [])
            # ensure period_exclusions is a list, otherwise set it to an empty list
            if not isinstance(period_exclusions, list):
                period_exclusions = []

            # get the algorithm
            algorithm = entity_outlier_dict.get("algorithm", "DensityFunction")

            # get the boundaries_extraction_macro
            boundaries_extraction_macro = entity_outlier_dict.get(
                "boundaries_extraction_macro", "splk_outliers_extract_boundaries"
            )

            # optional extra parameters for the fit command
            fit_extra_parameters = entity_outlier_dict.get("fit_extra_parameters", None)

            # optional extra parameters for the apply command
            apply_extra_parameters = entity_outlier_dict.get(
                "apply_extra_parameters", None
            )

            rules_summary = {
                "is_disabled": is_disabled,
                "cim_breakby": cim_breakby,
                "entity_name": entity_name,
                "cim_field": cim_field,
                "kpi_metric": kpi_metric,
                "kpi_span": kpi_span,
                "method_calculation": method_calculation,
                "density_lowerthreshold": density_lowerthreshold,
                "density_upperthreshold": density_upperthreshold,
                "period_calculation": period_calculation,
                "period_calculation_latest": period_calculation_latest,
                "time_factor": time_factor,
                "perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
                "perc_min_upperbound_deviation": perc_min_upperbound_deviation,
                "alert_lower_breached": alert_lower_breached,
                "alert_upper_breached": alert_upper_breached,
                "min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
                "min_value_for_upperbound_breached": min_value_for_upperbound_breached,
                "static_lower_threshold": static_lower_threshold,
                "static_upper_threshold": static_upper_threshold,
                "period_exclusions": period_exclusions,
                "algorithm": algorithm,
                "boundaries_extraction_macro": boundaries_extraction_macro,
                "fit_extra_parameters": fit_extra_parameters,
                "apply_extra_parameters": apply_extra_parameters,
            }

            logging.debug(
                f'Processing outliers entity="{entity_outlier}", rules_summary="{rules_summary}"'
            )

        except Exception as e:
            msg = f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", entity_outlier="{entity_outlier}", failed to extract one or more expected settings from the entity, is this record corrupted? Exception="{str(e)}"'
            logging.error(msg)
            raise Exception(msg)

    elif mode == "simulation":

        # log debug
        logging.debug("mode is simulation")

        # log debug
        logging.debug(f"model_json_def={model_json_def}")

        # load the model definition as a dict
        try:
            model_json_def = json.loads(model_json_def)
            # log debug
            logging.debug(
                f'successfully loaded model_json_def="{json.dumps(model_json_def, indent=4)}"'
            )
        except Exception as e:
            msg = f'failed to load the submitted model_json_def="{model_json_def}" with exception="{e}"'
            logging.error(msg)
            raise Exception(msg)

        # get definitions from the model_json_def
        is_disabled = model_json_def["is_disabled"]
        cim_breakby = model_json_def["cim_breakby"]
        entity_name = model_json_def["entity"]
        cim_field = model_json_def["cim_field"]
        kpi_metric = model_json_def["kpi_metric"]
        kpi_span = model_json_def["kpi_span"]
        method_calculation = model_json_def["method_calculation"]
        density_lowerthreshold = model_json_def["density_lowerthreshold"]
        density_upperthreshold = model_json_def["density_upperthreshold"]
        alert_lower_breached = model_json_def["alert_lower_breached"]
        alert_upper_breached = model_json_def["alert_upper_breached"]
        period_calculation = model_json_def["period_calculation"]
        # optional period_calculation_latest
        period_calculation_latest = model_json_def.get(
            "period_calculation_latest", "now"
        )
        time_factor = model_json_def["time_factor"]
        perc_min_lowerbound_deviation = model_json_def["perc_min_lowerbound_deviation"]
        perc_min_upperbound_deviation = model_json_def["perc_min_upperbound_deviation"]
        min_value_for_lowerbound_breached = model_json_def.get(
            "min_value_for_lowerbound_breached", 0
        )
        min_value_for_upperbound_breached = model_json_def.get(
            "min_value_for_upperbound_breached", 0
        )
        static_lower_threshold = model_json_def.get("static_lower_threshold", None)
        static_upper_threshold = model_json_def.get("static_upper_threshold", None)

        # period exclusions is an exception and is defined at the level of the model KVstore record
        period_exclusions = entity_outlier_dict.get("period_exclusions", [])
        # ensure period_exclusions is a list, otherwise set it to an empty list
        if not isinstance(period_exclusions, list):
            period_exclusions = []

        # get the algorithm
        algorithm = entity_outlier_dict.get("algorithm", "DensityFunction")

        # get the boundaries_extraction_macro
        boundaries_extraction_macro = entity_outlier_dict.get(
            "boundaries_extraction_macro", "splk_outliers_extract_boundaries"
        )

        # optional extra parameters for the fit command
        fit_extra_parameters = entity_outlier_dict.get("fit_extra_parameters", None)

        # optional extra parameters for the apply command
        apply_extra_parameters = entity_outlier_dict.get("apply_extra_parameters", None)

        rules_summary = {
            "is_disabled": is_disabled,
            "cim_breakby": cim_breakby,
            "entity_name": entity_name,
            "cim_field": cim_field,
            "kpi_metric": kpi_metric,
            "kpi_span": kpi_span,
            "method_calculation": method_calculation,
            "density_lowerthreshold": density_lowerthreshold,
            "density_upperthreshold": density_upperthreshold,
            "period_calculation": period_calculation,
            "period_calculation_latest": period_calculation_latest,
            "time_factor": time_factor,
            "perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
            "perc_min_upperbound_deviation": perc_min_upperbound_deviation,
            "alert_lower_breached": alert_lower_breached,
            "alert_upper_breached": alert_upper_breached,
            "min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
            "min_value_for_upperbound_breached": min_value_for_upperbound_breached,
            "static_lower_threshold": static_lower_threshold,
            "static_upper_threshold": static_upper_threshold,
            "period_exclusions": period_exclusions,
            "algorithm": algorithm,
            "boundaries_extraction_macro": boundaries_extraction_macro,
            "fit_extra_parameters": fit_extra_parameters,
            "apply_extra_parameters": apply_extra_parameters,
        }

        logging.debug(
            f'Processing outliers entity="{entity_outlier}", rules_summary="{rules_summary}"'
        )

    #
    # Proceed
    #

    # Define the Splunk searches
    ml_model_gen_search = None
    ml_model_render_search = None

    # Set the densityFunction threshold parameters
    if float(density_lowerthreshold) > 0 and float(density_upperthreshold) > 0:
        density_threshold_str = f"lower_threshold={density_lowerthreshold} upper_threshold={density_upperthreshold}"
    else:
        density_threshold_str = "lower_threshold=0.005 upper_threshold=0.005"
        error_msg = f"""\
            "densityFunction threshold parameters are incorrects for this entity,
            lower_threshold and upper_threshold must both be a positive value,
            will be using using factory value.
            """
        logging.error(
            f'tenant_id="{tenant_id}", compoent="{component}", object="{tenant_id}", {error_msg}'
        )

    # Construct the where NOT conditions, and also verifies if the period_exclusions are valid
    where_conditions = ""
    if period_exclusions:
        for period in period_exclusions:
            logging.debug(f"period_exclusion: {period}")

            # get the period_latest
            period_latest = period["latest"]

            # period_calculation is a time relative expression to now, such as -30d for the past 30 days from now, so we need to convert it to a timestamp
            # extract the first two digits after the minus sign which corresponds to the number of days, then convert to seconds, and apply against the current time
            period_calculation_no_days = int(period_calculation[1:3]) * 86400
            period_calculation_timestamp = int(time.time()) - period_calculation_no_days

            # if the period_earliest and period_latest are not valid, then we need to skip this period_exclusion
            if period_latest < period_calculation_timestamp:
                logging.info(
                    f"tenant_id={tenant_id}, object={object_value}, model_id={entity_outlier} rejecting period exclusion as it is now out of the model period calculation: {json.dumps(period, indent=4)}"
                )

                # delete the period_exclusion from the list
                period_exclusions.remove(period)

                # update the entity_outlier_dict
                entity_outlier_dict["period_exclusions"] = period_exclusions

            else:
                logging.info(
                    f"tenant_id={tenant_id}, object={object_value}, model_id={entity_outlier} accepting period exclusion: {json.dumps(period, indent=4)}"
                )
                where_conditions += f'``` period_exclusions for this ML model: ```\n| where NOT (_time>{period["earliest"]} AND _time<{period["latest"]})\n'

    else:
        where_conditions = "``` no period_exclusions for this ML model ```"

    # set the lookup name
    if mode == "live":
        ml_model_lookup_name = f"__mlspl_{entity_outlier}.mlmodel"
        ml_model_lookup_shortname = f"{entity_outlier}"
    elif mode == "simulation":
        ml_model_lookup_name = f"__mlspl_simulation_{entity_outlier}.mlmodel"
        ml_model_lookup_shortname = f"simulation_{entity_outlier}"

    #
    # Delete current ML model
    #

    # if the current ml model exists, then we need to delete it
    if os.path.exists(
        os.path.join(
            splunkhome,
            "etc",
            "users",
            "splunk-system-user",
            "trackme",
            "lookups",
            ml_model_lookup_name,
        )
    ):

        # Attempt to delete the current ml model
        rest_url = f"{splunkd_uri}/servicesNS/splunk-system-user/trackme/data/lookup-table-files/{ml_model_lookup_name}"

        logging.info(
            f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", attempting to delete Machine Learning lookup_name="{ml_model_lookup_name}"'
        )
        try:
            response = requests.delete(
                rest_url,
                headers=header,
                verify=False,
                timeout=300,
            )
            if response.status_code not in (200, 201, 204):
                logging.warning(
                    f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", failure to delete ML lookup_name="{ml_model_lookup_name}", this might be expected if the model does not exist yet or has been deleted manually, url="{rest_url}", response.status_code="{response.status_code}", response.text="{response.text}"'
                )
            else:
                logging.info(
                    f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", action="success", deleted lookup_name="{ml_model_lookup_name}" successfully'
                )

                # Update ml_model_filesize / ml_model_lookup_share
                if mode == "live":
                    entity_outlier_dict["ml_model_filesize"] = "pending"
                    entity_outlier_dict["ml_model_lookup_share"] = "pending"
                elif mode == "simulation":
                    entity_outlier_dict["ml_model_simulation_filesize"] = "pending"
                    entity_outlier_dict["ml_model_simulation_lookup_share"] = "pending"

        except Exception as e:
            logging.error(
                f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", failure to delete ML lookup_name="{ml_model_lookup_name}" with exception="{str(e)}"'
            )

    #
    # Set and run the Machine Learning model training search
    #

    # set the cim_entity_filter, if cim_breakby set to "none", equals to entity="global"
    if cim_breakby == "none":
        cim_entity_filter = 'cim_entity_zone="global"'
    else:
        cim_entity_filter = f"{cim_breakby}={entity_name}"

    # define the gen search, handle the search depending on if time_factor is set to none or not
    if time_factor == "none":

        fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} into {ml_model_lookup_shortname}"

        # if any, add extra parameters to the fit command
        if fit_extra_parameters:
            fit_command += f" {fit_extra_parameters}"

        ml_model_gen_search = remove_leading_spaces(
            f"""\
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" {cim_entity_filter} cim_field="{cim_field}" by object span="{kpi_span}"
            {where_conditions}
            | {fit_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            | fields _time {kpi_metric} LowerBound UpperBound
            | stats count as metrics_count
            """
        )

    else:

        fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} into {ml_model_lookup_shortname} by factor"

        # if any, add extra parameters to the fit command
        if fit_extra_parameters:
            fit_command += f" {fit_extra_parameters}"

        ml_model_gen_search = remove_leading_spaces(
            f"""\
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" {cim_entity_filter} cim_field="{cim_field}" by object span="{kpi_span}"
            {where_conditions}
            | eval factor=strftime(_time, "{time_factor}")
            | {fit_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            | fields _time {kpi_metric} LowerBound UpperBound
            | stats count as metrics_count
            """
        )

    # log debug
    logging.debug(f'ml_model_gen_search="{ml_model_gen_search}"')

    # define the render search depending on if time_factor is set to none or not, to be stored for further usage purposes
    if time_factor == "none":

        apply_command = f"apply {ml_model_lookup_shortname}"

        # if any, add extra parameters to the apply command
        if apply_extra_parameters:
            apply_command += f" {apply_extra_parameters}"

        ml_model_render_search = remove_leading_spaces(
            f"""
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}"
            tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" {cim_entity_filter} cim_field="{cim_field}" by object span="{kpi_span}"
            | {apply_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            | fields _time {kpi_metric} LowerBound UpperBound
        """
        )

    else:

        apply_command = f"apply {ml_model_lookup_shortname}"

        # if any, add extra parameters to the apply command
        if apply_extra_parameters:
            apply_command += f" {apply_extra_parameters}"

        ml_model_render_search = remove_leading_spaces(
            f"""
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}"
            tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" {cim_entity_filter} cim_field="{cim_field}" by object span="{kpi_span}"
            | eval factor=strftime(_time, "{time_factor}")
            | {apply_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            | fields _time {kpi_metric} LowerBound UpperBound
        """
        )

    # set kwargs
    kwargs_oneshot = {
        "earliest_time": str(period_calculation),
        "latest_time": str(period_calculation_latest),
        "output_mode": "json",
        "count": 0,
    }

    #
    # Run
    #

    # run search

    # track the search runtime
    start = time.time()

    # proceed
    try:
        reader = run_splunk_search(
            service,
            ml_model_gen_search,
            kwargs_oneshot,
            24,
            5,
        )

        for item in reader:
            if isinstance(item, dict):
                # log
                logging.info(
                    f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", kpi_metric="{kpi_metric}", entity_outlier="{entity_outlier}", Machine Learning model training search executed successfully, run_time="{round(time.time() - start, 3)}", results="{json.dumps(item, indent=0)}"'
                )

            # retrieve the current share level
            if mode == "live":
                entity_outlier_dict["ml_model_lookup_share"] = "pending"
            elif mode == "simulation":
                entity_outlier_dict["ml_model_lookup_share"] = "pending"

            # Update ml_model_lookup_share
            entity_outlier_dict["ml_model_lookup_share"] = "private"

            # Update owner and perms
            entity_outlier_dict["ml_model_lookup_owner"] = "splunk-system-user"

    except Exception as e:
        msg = f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", kpi_metric="{kpi_metric}", entity_outlier="{entity_outlier}", Machine Learning model training search failed with exception="{str(e)}", run_time="{str(time.time() - start)}"'
        logging.error(msg)
        raise Exception(msg)

    if mode == "live":

        # Update last_exec
        entity_outlier_dict["last_exec"] = str(time.time())

        # Update ml_model_gen_search
        entity_outlier_dict["ml_model_gen_search"] = ml_model_gen_search

        # Update
        entity_outlier_dict["ml_model_render_search"] = ml_model_render_search

        # Update rules_access_search
        entity_outlier_dict["rules_access_search"] = (
            f'| inputlookup trackme_{component}_outliers_entity_rules_tenant_{tenant_id} where _key="{key_value}"'
        )

        # Update ml_model_filename
        entity_outlier_dict["ml_model_filename"] = ml_model_lookup_name

        # Update ml_model_summary_search
        entity_outlier_dict["ml_model_summary_search"] = f"| summary {entity_outlier}"

        # Update ml_model_filesize
        try:
            entity_outlier_dict["ml_model_filesize"] = os.path.getsize(
                os.path.join(
                    splunkhome,
                    "etc",
                    "users",
                    "splunk-system-user",
                    "trackme",
                    "lookups",
                    ml_model_lookup_name,
                )
            )

        except Exception as e:
            logging.info(
                f'tenant_id="{tenant_id}", size of the ML lookup_name="{ml_model_lookup_name}" cannot be determined yet as the model may not be ready, response="{str(e)}"'
            )
            entity_outlier_dict["ml_model_filesize"] = "pending"

        # Update the main dict
        entities_outliers[entity_outlier] = entity_outlier_dict

    elif mode == "simulation":

        # Update last_exec
        entity_outlier_dict["ml_model_simulation_last_exec"] = str(time.time())

        # Update ml_model_gen_search
        entity_outlier_dict["ml_model_simulation_gen_search"] = ml_model_gen_search

        # Update
        entity_outlier_dict["ml_model_simulation_render_search"] = (
            ml_model_render_search
        )

        # Update rules_access_search
        entity_outlier_dict["ml_model_simulation_rules_access_search"] = (
            f'| inputlookup trackme_{component}_outliers_entity_rules_tenant_{tenant_id} where _key="{key_value}"'
        )

        # Update ml_model_filename
        entity_outlier_dict["ml_model_simulation_filename"] = ml_model_lookup_name

        # Update ml_model_summary_search
        entity_outlier_dict["ml_model_simulation_summary_search"] = (
            f"| summary {entity_outlier}"
        )

        # Update ml_model_filesize
        try:
            entity_outlier_dict["ml_model_simulation_filesize"] = os.path.getsize(
                os.path.join(
                    splunkhome,
                    "etc",
                    "users",
                    "splunk-system-user",
                    "trackme",
                    "lookups",
                    ml_model_lookup_name,
                )
            )
        except Exception as e:
            logging.info(
                f'tenant_id="{tenant_id}", size of the ML lookup_name="{ml_model_lookup_name}" cannot be determined yet as the model may not be ready, response="{str(e)}"'
            )
            entity_outlier_dict["ml_model_simulation_filesize"] = "pending"

        # Update the main dict
        entities_outliers[entity_outlier] = entity_outlier_dict

    #
    # End
    #

    # finally, return entities_outliers
    return entities_outliers, entity_outlier, entity_outlier_dict


def return_cim_lightsimulation_search(
    tenant_id, component, object_value, metric_idx, model_json_def
):

    # log debug
    logging.debug("mode is simulation")

    # log debug
    logging.debug(f"model_json_def={model_json_def}")

    # load the model definition as a dict
    if not isinstance(model_json_def, dict):
        try:
            model_json_def = json.loads(model_json_def)
            # log debug
            logging.debug(
                f'successfully loaded model_json_def="{json.dumps(model_json_def, indent=4)}"'
            )
        except Exception as e:
            msg = f'failed to load the submitted model_json_def="{model_json_def}" with exception="{e}"'
            logging.error(msg)
            raise Exception(msg)

    # get definitions from the model_json_def
    kpi_metric = model_json_def["kpi_metric"]
    kpi_span = model_json_def["kpi_span"]
    entity_name = model_json_def["entity"]
    cim_field = model_json_def["cim_field"]
    method_calculation = model_json_def["method_calculation"]
    density_lowerthreshold = model_json_def["density_lowerthreshold"]
    density_upperthreshold = model_json_def["density_upperthreshold"]
    alert_lower_breached = model_json_def["alert_lower_breached"]
    alert_upper_breached = model_json_def["alert_upper_breached"]
    period_calculation = model_json_def["period_calculation"]
    # optional period_calculation_latest
    period_calculation_latest = model_json_def.get("period_calculation_latest", "now")
    time_factor = model_json_def["time_factor"]
    perc_min_lowerbound_deviation = model_json_def["perc_min_lowerbound_deviation"]
    perc_min_upperbound_deviation = model_json_def["perc_min_upperbound_deviation"]
    min_value_for_lowerbound_breached = model_json_def.get(
        "min_value_for_lowerbound_breached", 0
    )
    min_value_for_upperbound_breached = model_json_def.get(
        "min_value_for_upperbound_breached", 0
    )
    static_lower_threshold = model_json_def.get("static_lower_threshold", None)
    static_upper_threshold = model_json_def.get("static_upper_threshold", None)

    # get the algorithm
    algorithm = model_json_def.get("algorithm", "DensityFunction")

    # get the boundaries_extraction_macro
    boundaries_extraction_macro = model_json_def.get(
        "boundaries_extraction_macro", "splk_outliers_extract_boundaries"
    )

    # optional extra parameters for the fit command
    fit_extra_parameters = model_json_def.get("fit_extra_parameters", None)

    # optional extra parameters for the apply command
    apply_extra_parameters = model_json_def.get("apply_extra_parameters", None)

    rules_summary = {
        "kpi_metric": kpi_metric,
        "kpi_span": kpi_span,
        "entity_name": entity_name,
        "cim_field": cim_field,
        "method_calculation": method_calculation,
        "density_lowerthreshold": density_lowerthreshold,
        "density_upperthreshold": density_upperthreshold,
        "period_calculation": period_calculation,
        "period_calculation_latest": period_calculation_latest,
        "time_factor": time_factor,
        "perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
        "perc_min_upperbound_deviation": perc_min_upperbound_deviation,
        "alert_lower_breached": alert_lower_breached,
        "alert_upper_breached": alert_upper_breached,
        "min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
        "min_value_for_upperbound_breached": min_value_for_upperbound_breached,
        "static_lower_threshold": static_lower_threshold,
        "static_upper_threshold": static_upper_threshold,
        "algorithm": algorithm,
        "boundaries_extraction_macro": boundaries_extraction_macro,
        "fit_extra_parameters": fit_extra_parameters,
        "apply_extra_parameters": apply_extra_parameters,
    }

    logging.debug(f'Processing outliers simulation rules_summary="{rules_summary}"')

    #
    # Proceed
    #

    # Define the Splunk searches
    ml_model_gen_search = None

    # Set the densityFunction threshold parameters
    if float(density_lowerthreshold) > 0 and float(density_upperthreshold) > 0:
        density_threshold_str = f"lower_threshold={density_lowerthreshold} upper_threshold={density_upperthreshold}"
    else:
        density_threshold_str = "lower_threshold=0.005 upper_threshold=0.005"
        error_msg = f"""\
            "densityFunction threshold parameters are incorrects for this entity,
            lower_threshold and upper_threshold must both be a positive value,
            will be using using factory value.
            """
        logging.error(
            f'tenant_id="{tenant_id}", compoent="{component}", object="{tenant_id}", {error_msg}'
        )

    # set the cim_entity_filter, if cim_breakby set to "none", equals to entity="global"
    if entity_name == "global":
        cim_entity_filter = 'cim_entity_zone="global"'
    else:
        cim_entity_filter = f"cim_entity_zone={entity_name}"

    # define the gen search, handle the search depending on if time_factor is set to none or not
    if time_factor == "none":

        fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str}"

        # if any, add extra parameters to the fit command
        if fit_extra_parameters:
            fit_command += f" {fit_extra_parameters}"

        ml_model_gen_search = remove_leading_spaces(
            f"""\
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" {cim_entity_filter} cim_field="{cim_field}" by object span="{kpi_span}"
            | {fit_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            """
        )

    else:

        fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} by factor"

        # if any, add extra parameters to the fit command
        if fit_extra_parameters:
            fit_command += f" {fit_extra_parameters}"

        ml_model_gen_search = remove_leading_spaces(
            f"""\
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" {cim_entity_filter} cim_field="{cim_field}" by object span="{kpi_span}"
            | eval factor=strftime(_time, "{time_factor}")
            | {fit_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            """
        )

    # log debug
    logging.debug(f'ml_model_gen_search="{ml_model_gen_search}"')

    return ml_model_gen_search
