#!/usr/bin/env python
# coding=utf-8

__author__ = "TrackMe Limited"
__copyright__ = "Copyright 2023-2025, TrackMe Limited, U.K."
__credits__ = "TrackMe Limited, U.K."
__license__ = "TrackMe Limited, all rights reserved"
__version__ = "0.1.0"
__maintainer__ = "TrackMe Limited, U.K."
__email__ = "support@trackme-solutions.com"
__status__ = "PRODUCTION"

import os
import sys
import requests
import json
import hashlib
import random
import time
import logging
from logging.handlers import RotatingFileHandler
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# splunk home
splunkhome = os.environ["SPLUNK_HOME"]

# append lib
sys.path.append(os.path.join(splunkhome, "etc", "apps", "trackme", "lib"))

# import TrackMe libs
from trackme_libs import JSONFormatter

# import Splunk libs
import splunklib.client as client

# logging:
# To avoid overriding logging destination of callers, the libs will not set on purpose any logging definition
# and rely on callers themselves


# Generate metrics for splk-cim entities
def trackme_cim_fields_gen_metrics(
    session_key,
    splunkd_port,
    splunkd_uri,
    tenant_id,
    object_name,
    object_category,
    cim_field,
    state_event,
):
    try:
        # get service
        service = client.connect(
            owner="nobody",
            app="trackme",
            port=splunkd_port,
            token=session_key,
            timeout=600,
        )

        # Define a header for authenticated requests with splunkd
        header = {
            "Authorization": "Splunk %s" % session_key,
            "Content-Type": "application/json",
        }

        # Get global index configuration
        conf_file = "trackme_settings"
        confs = service.confs[str(conf_file)]
        trackme_metric_idx = None
        for stanza in confs:
            if stanza.name == "index_settings":
                trackme_metric_idx = stanza.content.get("trackme_metric_idx")

        # Get tenant-specific index settings
        tenant_trackme_metric_idx = None
        url = f"{splunkd_uri}/services/trackme/v2/vtenants/tenant_idx_settings"
        data = {"tenant_id": tenant_id, "idx_stanza": "trackme_metric_idx"}

        try:
            response = requests.post(
                url,
                headers=header,
                data=json.dumps(data, indent=1),
                verify=False,
                timeout=600,
            )
            if response.status_code in (200, 201, 204):
                response_data = response.json()
                tenant_trackme_metric_idx = response_data.get("trackme_metric_idx")
        except Exception:
            pass  # Fallback to global index in case of error

        if not tenant_trackme_metric_idx:
            tenant_trackme_metric_idx = trackme_metric_idx

        # Load state_event JSON
        state_event = json.loads(state_event)

        # Extract key values
        breakby_field = state_event["breakby_field"]
        breakby_entity = state_event["breakby_entity"]
        object_id = hashlib.sha256(object_name.encode("utf-8")).hexdigest()

        state_event_status = state_event["status"]
        state_event_total_count = state_event["total_count"]
        state_event_count_unknown = state_event["count_unknown"]
        state_event_count_not_unknown = state_event["count_not_unknown"]
        state_event_pct_coverage_unknown = state_event["pct_coverage_unknown"]
        state_event_pct_coverage_compliant = state_event["pct_coverage_compliant"]

        # Convert status to numerical
        state_event_status_num = {"green": 0, "red": 1}.get(state_event_status, 2)

        # Save original handlers
        log = logging.getLogger()
        original_handlers = log.handlers[:]  # Copy all handlers

        # Set a new log file handler
        filehandler = RotatingFileHandler(
            f"{splunkhome}/var/log/splunk/trackme_cim_metrics.log",
            mode="a",
            maxBytes=100000000,
            backupCount=1,
        )
        formatter = JSONFormatter()
        filehandler.setFormatter(formatter)
        log.handlers = [filehandler]  # Replace all existing handlers

        logging.info(
            "Metrics - group=cim_metrics",
            extra={
                "target_index": tenant_trackme_metric_idx,
                "tenant_id": tenant_id,
                "object": object_name,
                "object_id": object_id,
                "object_category": object_category,
                breakby_field: breakby_entity,
                "cim_field": cim_field,
                "status": state_event_status_num,
                "total_count": state_event_total_count,
                "count_unknown": state_event_count_unknown,
                "count_not_unknown": state_event_count_not_unknown,
                "pct_coverage_unknown": state_event_pct_coverage_unknown,
                "pct_coverage_compliant": state_event_pct_coverage_compliant,
            },
        )

    except Exception as e:
        raise Exception(str(e))

    finally:
        # Restore original handlers
        log.handlers = original_handlers  # Restore all previous handlers


# Define outliers rules
def trackme_cim_splk_outliers_set_rule(
    session_key,
    splunkd_port,
    reqinfo,
    tenant_id,
    object_name,
    cim_breakby,
    entity_name,
    cim_field,
):
    # get service
    service = client.connect(
        owner="nobody",
        app="trackme",
        port=splunkd_port,
        token=session_key,
        timeout=600,
    )

    # Get app level config
    splk_outliers_detection = reqinfo["trackme_conf"]["splk_outliers_detection"]

    # Assign
    splk_outliers_detection_disable_default = splk_outliers_detection[
        "splk_outliers_detection_disable_default"
    ]
    splk_outliers_calculation_default = splk_outliers_detection[
        "splk_outliers_calculation_default"
    ]
    splk_outliers_density_lower_threshold_default = splk_outliers_detection[
        "splk_outliers_density_lower_threshold_default"
    ]
    splk_outliers_density_upper_threshold_default = splk_outliers_detection[
        "splk_outliers_density_upper_threshold_default"
    ]
    splk_outliers_alert_lower_threshold_volume_default = splk_outliers_detection[
        "splk_outliers_alert_lower_threshold_volume_default"
    ]
    splk_outliers_alert_upper_threshold_volume_default = splk_outliers_detection[
        "splk_outliers_alert_upper_threshold_volume_default"
    ]
    splk_outliers_alert_lower_threshold_latency_default = splk_outliers_detection[
        "splk_outliers_alert_lower_threshold_latency_default"
    ]
    splk_outliers_alert_upper_threshold_latency_default = splk_outliers_detection[
        "splk_outliers_alert_upper_threshold_latency_default"
    ]
    splk_outliers_detection_period_default = splk_outliers_detection[
        "splk_outliers_detection_period_default"
    ]
    splk_outliers_detection_period_latest_default = splk_outliers_detection[
        "splk_outliers_detection_period_latest_default"
    ]
    splk_outliers_detection_timefactor_default = splk_outliers_detection[
        "splk_outliers_detection_timefactor_default"
    ]
    splk_outliers_detection_latency_kpi_metric_default = splk_outliers_detection[
        "splk_outliers_detection_latency_kpi_metric_default"
    ]
    splk_outliers_detection_volume_kpi_metric_default = splk_outliers_detection[
        "splk_outliers_detection_volume_kpi_metric_default"
    ]
    splk_outliers_perc_min_lowerbound_deviation_default = splk_outliers_detection[
        "splk_outliers_perc_min_lowerbound_deviation_default"
    ]
    splk_outliers_perc_min_upperbound_deviation_default = splk_outliers_detection[
        "splk_outliers_perc_min_upperbound_deviation_default"
    ]
    splk_outliers_mltk_algorithms_default = splk_outliers_detection.get(
        "splk_outliers_mltk_algorithms_default", "DensityFunction"
    )
    splk_outliers_boundaries_extraction_macro_default = splk_outliers_detection.get(
        "splk_outliers_boundaries_extraction_macro_default",
        "splk_outliers_extract_boundaries",
    )
    splk_outliers_fit_extra_attributes_default = splk_outliers_detection.get(
        "splk_outliers_fit_extra_parameters", None
    )
    splk_outliers_apply_extra_attributes_default = splk_outliers_detection.get(
        "splk_outliers_apply_extra_parameters", None
    )
    splk_outliers_static_lower_threshold_default = splk_outliers_detection.get(
        "splk_outliers_static_lower_threshold", None
    )
    splk_outliers_static_upper_threshold_default = splk_outliers_detection.get(
        "splk_outliers_static_upper_threshold", None
    )
    splk_outliers_auto_correct = splk_outliers_detection["splk_outliers_auto_correct"]

    # Data collection
    collection_name = (
        "kv_trackme_cim_outliers_entity_rules" + "_tenant_" + str(tenant_id)
    )
    collection = service.kvstore[collection_name]

    # Define the KV query
    query_string = {
        "$and": [
            {
                "object_category": "splk-cim",
                "object": object_name,
                "entity": entity_name,
                "cim_field": cim_field,
            }
        ]
    }

    # Search for the kvrecord
    kvrecord = None
    try:
        kvrecord = collection.data.query(query=json.dumps(query_string))
    except Exception as e:
        kvrecord = None

    if not kvrecord:
        logging.info(
            f'tenant_id="{tenant_id}", object="{object_name}", entity="{entity_name}", cim_field="{cim_field}", there are no outliers rules set for this cim_field yet'
        )

        # Insert a new kvrecord
        try:
            # Register a new permanently deleted object
            new_entity_outliers = {
                "model_"
                + str(random.getrandbits(48)): {
                    "is_disabled": 0,
                    "cim_breakby": cim_breakby,
                    "entity": entity_name,
                    "cim_field": cim_field,
                    "kpi_metric": "splk.cim.pct_coverage_compliant",
                    "kpi_span": "10m",
                    "method_calculation": splk_outliers_calculation_default,
                    "density_lowerthreshold": splk_outliers_density_lower_threshold_default,
                    "density_upperthreshold": splk_outliers_density_upper_threshold_default,
                    "alert_lower_breached": 1,
                    "alert_upper_breached": 0,
                    "period_calculation": splk_outliers_detection_period_default,
                    "period_calculation_latest": splk_outliers_detection_period_latest_default,
                    "time_factor": splk_outliers_detection_timefactor_default,
                    "auto_correct": splk_outliers_auto_correct,
                    "perc_min_lowerbound_deviation": splk_outliers_perc_min_lowerbound_deviation_default,
                    "perc_min_upperbound_deviation": splk_outliers_perc_min_upperbound_deviation_default,
                    "period_exclusions": [],
                    "algorithm": splk_outliers_mltk_algorithms_default,
                    "extract_boundaries_macro": splk_outliers_boundaries_extraction_macro_default,
                    "fit_extra_parameters": splk_outliers_fit_extra_attributes_default,
                    "apply_extra_parameters": splk_outliers_apply_extra_attributes_default,
                    "static_lower_threshold": splk_outliers_static_lower_threshold_default,
                    "static_upper_threshold": splk_outliers_static_upper_threshold_default,
                    "ml_model_gen_search": "pending",
                    "ml_model_render_search": "pending",
                    "ml_model_summary_search": "pending",
                    "rules_access_search": "pending",
                    "ml_model_filename": "pending",
                    "ml_model_filesize": "pending",
                    "ml_model_lookup_share": "pending",
                    "ml_model_lookup_owner": "pending",
                    "last_exec": "pending",
                },
                "model_"
                + str(random.getrandbits(48)): {
                    "is_disabled": 0,
                    "cim_breakby": cim_breakby,
                    "entity": entity_name,
                    "cim_field": cim_field,
                    "kpi_metric": "splk.cim.count_unknown",
                    "kpi_span": "10m",
                    "method_calculation": splk_outliers_calculation_default,
                    "density_lowerthreshold": splk_outliers_density_lower_threshold_default,
                    "density_upperthreshold": splk_outliers_density_upper_threshold_default,
                    "alert_lower_breached": 0,
                    "alert_upper_breached": 1,
                    "period_calculation": splk_outliers_detection_period_default,
                    "period_calculation_latest": splk_outliers_detection_period_latest_default,
                    "time_factor": splk_outliers_detection_timefactor_default,
                    "auto_correct": splk_outliers_auto_correct,
                    "perc_min_lowerbound_deviation": splk_outliers_perc_min_lowerbound_deviation_default,
                    "perc_min_upperbound_deviation": splk_outliers_perc_min_upperbound_deviation_default,
                    "period_exclusions": [],
                    "algorithm": splk_outliers_mltk_algorithms_default,
                    "extract_boundaries_macro": splk_outliers_boundaries_extraction_macro_default,
                    "fit_extra_parameters": splk_outliers_fit_extra_attributes_default,
                    "apply_extra_parameters": splk_outliers_apply_extra_attributes_default,
                    "static_lower_threshold": splk_outliers_static_lower_threshold_default,
                    "static_upper_threshold": splk_outliers_static_upper_threshold_default,
                    "ml_model_gen_search": "pending",
                    "ml_model_render_search": "pending",
                    "ml_model_summary_search": "pending",
                    "rules_access_search": "pending",
                    "ml_model_filename": "pending",
                    "ml_model_filesize": "pending",
                    "ml_model_lookup_share": "pending",
                    "ml_model_lookup_owner": "pending",
                    "last_exec": "pending",
                },
            }

            new_kvrecord = {
                "object": object_name,
                "object_category": "splk-cim",
                "entity": entity_name,
                "cim_field": cim_field,
                "mtime": str(time.time()),
                "is_disabled": splk_outliers_detection_disable_default,
                "entities_outliers": json.dumps(new_entity_outliers, indent=4),
                "last_exec": "pending",
            }

            collection.data.insert(json.dumps(new_kvrecord))
            logging.info(
                f'tenant_id="{tenant_id}", object="{object_name}", entity="{entity_name}", cim_field="{cim_field}", Outliers rules new record created, record="{json.dumps(new_entity_outliers)}"'
            )

            # return the list of models created
            created_models = []
            for created_model in new_entity_outliers:
                created_models.append(created_model)
            return {
                "result": "success",
                "entity": entity_name,
                "cim_field": cim_field,
                "model_ids": created_models,
            }

        except Exception as e:
            logging.error(
                f'tenant_id="{tenant_id}", object="{object_name}", entity="{entity_name}", cim_field="{cim_field}", Outliers rules creation failed with exception="{str(e)}"'
            )

    else:
        logging.debug(
            f'tenant_id="{tenant_id}", object="{object_name}", entity="{entity_name}", cim_field="{cim_field}", outliers rules records found, record=\'{json.dumps(kvrecord)}\''
        )
        return {
            "result": "success",
            "entity": entity_name,
            "cim_field": cim_field,
            "results": "ML outliers exists already, please reset first",
        }


# Get outliers status
def trackme_cim_splk_outliers_get_status(
    session_key, splunkd_port, tenant_id, object_name
):
    # get service
    service = client.connect(
        owner="nobody",
        app="trackme",
        port=splunkd_port,
        token=session_key,
        timeout=600,
    )

    # Data collection
    collection_name = (
        "kv_trackme_cim_outliers_entity_data" + "_tenant_" + str(tenant_id)
    )
    collection = service.kvstore[collection_name]

    # Define the KV query
    query_string = {
        "$and": [
            {
                "object_category": "splk-cim",
                "object": object_name,
            }
        ]
    }

    # Search for the kvrecord
    kvrecords = None
    try:
        kvrecords = collection.data.query(query=json.dumps(query_string))
    except Exception as e:
        kvrecords = None

    # process
    try:
        # return 0 if no results yet
        if not kvrecords:
            logging.info(
                f'tenant_id="{tenant_id}", object="{object_name}", outliers inspection, there are no outliers data results for this object yet'
            )
            return {
                "outlierStatus": "not_ready",
                "outlierDesc": "Outliers models have not been processed yet",
                "isOutlier": 2,
            }

        # if we have results, loop through the records and check the outliers status
        else:
            # global variables
            anomaly_records = []
            models_processed = []
            models_in_anomaly = []
            cim_fields_red = {}  # dict with entity as the key
            cim_fields_green = {}  # dict with entity as the key
            anomaly_global_status = 0
            anomaly_model_count = 0

            # loop
            for kvrecord in kvrecords:
                logging.debug(
                    f'tenant_id="{tenant_id}", object="{object_name}", outliers inspection, get anomaly outliers statuses record="{json.dumps(kvrecord)}"'
                )

                # log debug
                logging.debug(
                    f'tenant_id="{tenant_id}", object="{object_name}", outliers inspection, inspecting record record="{json.dumps(kvrecord)}"'
                )

                # get fields
                outliers_cim_entity = kvrecord.get("entity")
                outliers_cim_field = kvrecord.get("cim_field")
                outliers_cim_field_status = int(kvrecord.get("isOutlier"))
                outliers_cim_field_model_summary = json.loads(
                    kvrecord.get("models_summary")
                )

                # add empty list to our green and red dict
                cim_fields_red[outliers_cim_entity] = []
                cim_fields_green[outliers_cim_entity] = []

                # if this model outlier is in anomaly, impact the global outlier num status
                if outliers_cim_field_status == 1:
                    anomaly_global_status = 1
                    anomaly_model_count += 1
                    # append to red field
                    if outliers_cim_field not in cim_fields_red[outliers_cim_entity]:
                        cim_fields_red[outliers_cim_entity].append(outliers_cim_field)
                else:
                    # append to green fields
                    if outliers_cim_field not in cim_fields_green[outliers_cim_entity]:
                        cim_fields_green[outliers_cim_entity].append(outliers_cim_field)

                # log debug
                logging.debug(
                    f'tenant_id="{tenant_id}", object="{object_name}", cim_field="{outliers_cim_field}", outliers inspection, isOutlier="{outliers_cim_field_status}", model_summary="{json.dumps(outliers_cim_field_model_summary)}"'
                )

                # loop through models
                for model_id in outliers_cim_field_model_summary:
                    # get the model dict
                    try:
                        model_dict = outliers_cim_field_model_summary[model_id]
                        logging.debug(
                            f'tenant_id="{tenant_id}", object="{object_name}", cim_field="{outliers_cim_field}", outliers inspection, mode_id="{model_id}", model_dict="{json.dumps(model_dict)}"'
                        )
                    except Exception as e:
                        model_dict = None
                        logging.error(
                            f'tenant_id="{tenant_id}", object="{object_name}", cim_field="{outliers_cim_field}", outliers inspection, model_id="{model_id}", cannot get model_dict with exception="{str(e)}"'
                        )

                    if model_dict:
                        # append to the processed models
                        models_processed.append(model_id)

                        # from the model, get main information
                        model_isOutlier = int(model_dict["isOutlier"])
                        model_isOutlierReason = model_dict["isOutlierReason"]
                        model_alert_lower_breached = model_dict["alert_lower_breached"]
                        model_alert_upper_breached = model_dict["alert_upper_breached"]
                        model_summary_search_results = model_dict[
                            "summary_search_results"
                        ]

                        if model_isOutlier == 1:
                            anomaly_records.append(
                                {
                                    "entity": outliers_cim_entity,
                                    "cim_field": outliers_cim_field,
                                    "model_id": model_id,
                                    "isOutlierReason": model_isOutlierReason,
                                    "alert_lower_breached": model_alert_lower_breached,
                                    "model_alert_upper_breached": model_alert_upper_breached,
                                    "model_summary_search_results": model_summary_search_results,
                                }
                            )
                            # append to models_in_anomaly
                            models_in_anomaly.append(model_id)

            # end of program
            logging.debug(
                f'tenant_id="{tenant_id}", object="{object_name}", outliers inspection, anomaly_records="{json.dumps(anomaly_records)}"'
            )

            final_record = {
                "isOutlier": anomaly_global_status,
            }

            # add conditionally
            if anomaly_model_count > 0:
                final_record["anomaly_reason"] = (
                    "ML Outliers detected: One of more ML models reported anomalies."
                )
                final_record["outlierStatus"] = "red"
                final_record["isOutlierDesc"] = (
                    f"{anomaly_model_count} ML models are currently in anomaly, review the model results"
                )
                final_record["anomaly_records"] = anomaly_records
                final_record["cim_field_green"] = cim_fields_green
                final_record["cim_field_red"] = cim_fields_red
            else:
                final_record["anomaly_reason"] = "None"
                final_record["outlierStatus"] = "green"
                final_record["isOutlierDesc"] = (
                    "there are no ML models currently in anomaly"
                )
                final_record["cim_field_green"] = cim_fields_green
                final_record["cim_field_red"] = cim_fields_red

            # append models processed
            final_record["models_processed"] = models_processed

            # append models in anomaly
            final_record["models_in_anomaly"] = models_in_anomaly

            logging.debug(
                f'tenant_id="{tenant_id}", object="{object_name}", outliers inspection, final_record="{json.dumps(final_record)}"'
            )
            return final_record

    except Exception as e:
        raise Exception(str(e))


# Return the splk-cim search string
def trackme_cim_return_search(
    tracker_name,
    tenant_id,
    object,
    account,
    cim_tracking_rules,
    cim_datamodel_name,
    cim_datamodel_nodename,
    cim_drop_dm_object_name,
    cim_root_constraint,
    cim_fields,
    earliest,
    latest,
    summariesonly,
):
    try:
        # load json as a dict
        cim_tracking_rules = json.loads(cim_tracking_rules)

        # store as a list
        cim_drop_dm_object_name_list = cim_drop_dm_object_name.split(",")
        cim_fields_list = cim_fields.split(",")

        # get breakby logic, and act accordingly
        cim_breakby_prefix = cim_tracking_rules["cim_breakby_prefix"]
        cim_breakby = cim_tracking_rules["cim_breakby"]

        # form the search
        tstats_search_list = []
        appendpipe_list = []
        search_base_str = (
            "tstats summariesonly="
            + str(summariesonly)
            + " count from datamodel="
            + str(cim_datamodel_name)
            + " where (nodename = "
            + str(cim_datamodel_nodename)
            + ")"
        )
        search_base_total_count_str = (
            "tstats summariesonly="
            + str(summariesonly)
            + " count as total_count from datamodel="
            + str(cim_datamodel_name)
            + " where (nodename = "
            + str(cim_datamodel_nodename)
            + ") prestats=t append=t"
        )

        # add the optional root constraint, if any
        if cim_root_constraint and len(cim_root_constraint) > 0:
            search_base_str = (
                str(search_base_str) + " AND (" + str(cim_root_constraint) + ") by "
            )
        else:
            search_base_str = str(search_base_str) + " by "

        # empty list objects
        search_break_by_list = []
        search_stats_list = []
        total_stats_list = []
        coverage_stats_list = []
        field_status_list = []

        # loop through the list of CIM fields, and create a dictionnary that will be used to generate the search
        search_dict = {}
        for field in cim_fields_list:
            # get the datamodel fieldname
            datamodel_fieldname = cim_tracking_rules[field][0]["datamodel_fieldname"]

            # get the min compliant coverage percentage
            min_compliant_coverage_percentage = cim_tracking_rules[field][1][
                "min_compliant_coverage_percentage"
            ]

            # get the max unknown coverage percentage
            max_unknown_coverage_percentage = cim_tracking_rules[field][2][
                "max_unknown_coverage_percentage"
            ]

            # get the regex validator rule
            regex_validator_rule = cim_tracking_rules[field][3]["regex_validator_rule"]

            # log debug
            logging.info(
                f'tenant_id="{tenant_id}", object="{object}", field="{field}", datamodel_fieldname="{datamodel_fieldname}"'
            )
            logging.info(
                f'tenant_id="{tenant_id}", object="{object}", field="{field}", min_compliant_coverage_percentage="{min_compliant_coverage_percentage}"'
            )
            logging.info(
                f'tenant_id="{tenant_id}", object="{object}", field="{field}", max_unknown_coverage_percentage="{max_unknown_coverage_percentage}"'
            )
            logging.info(
                f'tenant_id="{tenant_id}", object="{object}", field="{field}", regex_validator_rule="{regex_validator_rule}"'
            )

            dict_summary = {
                "cim_field": field,
                "search_break_by_str": datamodel_fieldname,
                "search_stats_str": "sum(eval(if("
                + field
                + '=="unknown", count, 0))) as '
                + field
                + "_count_unknown, sum(eval(if("
                + field
                + '!="unknown", count, 0))) as '
                + field
                + "_count_not_unknown, sum(eval(if(isnotnull("
                + field
                + ") AND match("
                + field
                + ', "'
                + str(regex_validator_rule.replace("\\\\", "\\"))
                + '"), count, 0))) as '
                + field
                + "_count_compliant",
                "total_stats_str": field
                + "_total=("
                + field
                + "_count_unknown+"
                + field
                + "_count_not_unknown)",
                "coverage_stats_str": field
                + "_pct_coverage_unknown=round((("
                + field
                + "_count_unknown/"
                + field
                + "_total)*100), 2), "
                + field
                + "_pct_coverage_compliant=round((("
                + field
                + "_count_compliant/"
                + field
                + "_total)*100), 2) | eval "
                + field
                + "_pct_coverage_unknown=if(isnum("
                + field
                + "_pct_coverage_unknown), "
                + field
                + "_pct_coverage_unknown, 0), "
                + field
                + "_pct_coverage_compliant=if(isnum("
                + field
                + "_pct_coverage_compliant), "
                + field
                + "_pct_coverage_compliant, 0)",
                "field_status_str": field
                + "_status=if("
                + field
                + "_pct_coverage_compliant<"
                + str(min_compliant_coverage_percentage)
                + " OR "
                + field
                + "_pct_coverage_unknown>"
                + str(max_unknown_coverage_percentage)
                + " OR isnull("
                + field
                + '_pct_coverage_compliant), "red", "green")',
            }

            # log debug
            logging.debug(
                f'tenant_id="{tenant_id}", object="{object}", field="{field}", dictionnary="{json.dumps(dict_summary)}"'
            )

            # append to the dict_summary
            search_dict[field] = dict_summary

        # loop through the cim fields
        for field in cim_fields_list:
            # define the tstats and appendpipe logic

            # differs if we have a break by logic or not
            if cim_breakby == "none":
                if len(tstats_search_list) == 0:
                    tstats_search_list.append(
                        search_base_str
                        + search_dict[field]["search_break_by_str"]
                        + " prestats=t"
                    )
                    appendpipe_list.append(
                        "appendpipe [ stats count by "
                        + search_dict[field]["search_break_by_str"]
                        + " ]"
                    )
                else:
                    tstats_search_list.append(
                        search_base_str
                        + search_dict[field]["search_break_by_str"]
                        + " prestats=t append=t"
                    )
                    appendpipe_list.append(
                        "appendpipe [ stats count by "
                        + search_dict[field]["search_break_by_str"]
                        + " ]"
                    )
            else:
                if len(tstats_search_list) == 0:
                    tstats_search_list.append(
                        search_base_str
                        + cim_breakby_prefix
                        + "."
                        + cim_breakby
                        + ", "
                        + search_dict[field]["search_break_by_str"]
                        + " prestats=t"
                    )
                    appendpipe_list.append(
                        "appendpipe [ stats count by "
                        + cim_breakby_prefix
                        + "."
                        + cim_breakby
                        + ", "
                        + search_dict[field]["search_break_by_str"]
                        + " ]"
                    )
                else:
                    tstats_search_list.append(
                        search_base_str
                        + cim_breakby_prefix
                        + "."
                        + cim_breakby
                        + ", "
                        + search_dict[field]["search_break_by_str"]
                        + " prestats=t append=t"
                    )
                    appendpipe_list.append(
                        "appendpipe [ stats count by "
                        + cim_breakby_prefix
                        + "."
                        + cim_breakby
                        + ", "
                        + search_dict[field]["search_break_by_str"]
                        + " ]"
                    )

            # rest of the logic
            search_break_by_list.append(search_dict[field]["search_break_by_str"])
            search_stats_list.append(search_dict[field]["search_stats_str"])
            total_stats_list.append(search_dict[field]["total_stats_str"])
            coverage_stats_list.append(search_dict[field]["coverage_stats_str"])
            field_status_list.append(search_dict[field]["field_status_str"])

        # form the complete search

        # part1: first pipe
        search_str = "| " + " | ".join(tstats_search_list)
        # add the total_count
        search_str = search_str + " | " + search_base_total_count_str
        search_str = search_str + "\n"

        # part2: appendpipe
        search_str = search_str + " | " + " | ".join(appendpipe_list)
        # add the total_count
        if cim_breakby == "none":
            search_str = (
                search_str + " | appendpipe [ stats count as total_count ]" + "\n"
            )
        else:
            search_str = (
                search_str
                + " | appendpipe [ stats count as total_count by "
                + cim_breakby_prefix
                + "."
                + cim_breakby
                + " ]"
                + "\n"
            )

        # part3: drop dm
        for drop_dm_name in cim_drop_dm_object_name_list:
            search_str = (
                search_str + '| `drop_dm_object_name("' + drop_dm_name + '")`\n'
            )

        # part4: first stats layer
        search_str = search_str + "| stats first(total_count) as count,\n"
        search_str = search_str + ", ".join(search_stats_list)
        # if breakby
        if cim_breakby == "none":
            search_str = search_str + "\n"
        else:
            search_str = search_str + " by " + cim_breakby + "\n"

        # part5: eval total
        search_str = search_str + "| eval "
        search_str = search_str + ", ".join(total_stats_list)
        search_str = search_str + "\n"

        # part6: eval coverage
        search_str = search_str + "| eval "
        search_str = search_str + ", ".join(coverage_stats_list)
        search_str = search_str + "\n"

        # part7: eval status
        search_str = search_str + "| eval "
        search_str = search_str + ", ".join(field_status_list)

        # final: if there is no break by, add a pseudo entity
        if cim_breakby == "none":
            search_str = search_str + ' | eval cim_entity_zone="global"'

        # handle local vs remote search

        # logging debug
        logging.info(f'tenant_id="{tenant_id}", object="{object}", account="{account}"')

        if account != "local":
            search_str = (
                '| splunkremotesearch account="'
                + str(account)
                + '", earliest="'
                + str(earliest)
                + '", latest="'
                + str(latest)
                + '" search="'
                + str(search_str.replace('"', '\\"'))
                + '" register_component="True" '
                + 'tenant_id="'
                + str(tenant_id)
                + '" component="splk-cim" report="'
                + str(tracker_name)
                + '"'
            )

        # Set the search
        return search_str

    except Exception as e:
        raise Exception(str(e))
