#!/usr/bin/env python
# coding=utf-8

__author__ = "TrackMe Limited"
__copyright__ = "Copyright 2023-2025, TrackMe Limited, U.K."
__credits__ = "TrackMe Limited, U.K."
__license__ = "TrackMe Limited, all rights reserved"
__version__ = "0.1.0"
__maintainer__ = "TrackMe Limited, U.K."
__email__ = "support@trackme-solutions.com"
__status__ = "PRODUCTION"

import os
import sys
import re
import json
import time
import logging
from logging.handlers import RotatingFileHandler
import urllib.parse
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# splunk home
splunkhome = os.environ["SPLUNK_HOME"]

# append lib
sys.path.append(os.path.join(splunkhome, "etc", "apps", "trackme", "lib"))

# import Splunk libs
import splunklib.client as client

# import trackme libs utils
from trackme_libs_utils import (
    decode_unicode,
    replace_encoded_doublebackslashes,
    replace_encoded_fourbackslashes,
    remove_leading_spaces,
)

# import TrackMe libs
from trackme_libs import JSONFormatter

# logging:
# To avoid overriding logging destination of callers, the libs will not set on purpose any logging definition
# and rely on callers themselves


# process and return main entity info
def splk_dsm_return_entity_info(object_dict):
    # empty response
    response = {}

    #
    # extract the account
    #

    # check and extract
    if re.search(r"^(?:remote|remoteraw)\|", object_dict.get("object")):
        # extract the account
        match = re.search(
            r"^(?:remote|remoteraw)\|account:(\w*)\|", object_dict.get("object")
        )
        if match:
            response["account"] = match.group(1)

    # local
    else:
        response["account"] = "local"

    #
    # get and add the search_mode
    #

    response["search_mode"] = object_dict.get("search_mode")

    #
    # extract the break by statement and special key, if any
    #

    # check and extract
    if re.search(r"\|(?:key|rawkey|cribl)\:", object_dict.get("object")):
        # tstats special key
        if re.search(r"\|(?:key)\:", object_dict.get("object")):
            # extract key and value
            match = re.search(r"\|(?:key)\:([^\|]*)\|(.*)", object_dict.get("object"))
            if match:
                response["breakby_key"] = match.group(1)
                response["breakby_value"] = match.group(2)

        # raw special key
        elif re.search(r"\|(?:rawkey)\:", object_dict.get("object")):
            # extract key and value
            match = re.search(
                r"\|(?:rawkey)\:([^\|]*)\|(.*)", object_dict.get("object")
            )
            if match:
                response["breakby_key"] = match.group(1)
                response["breakby_value"] = match.group(2)

        # cribl special key
        elif re.search(r"\|(?:cribl)\:", object_dict.get("object")):
            # extract cribl_pipe value
            match = re.search(r"\|(?:rawkey)\:[^\|*)\|(.*)", object_dict.get("object"))
            if match:
                response["breakby_key"] = "cribl_pipe"
                response["breakby_value"] = match.group(1)

        # no match, fallback
        else:
            response["breakby_key"] = "none"
            response["breakby_value"] = "none"
            response["breakby_statement"] = "index, sourcetype"

    # no special key
    else:
        response["breakby_key"] = "none"
        response["breakby_value"] = "none"
        response["breakby_statement"] = "index, sourcetype"

    # return
    return response


# return if the entity is an Elastic Source, and return information
def splk_dsm_return_elastic_info(session_key, splunkd_port, tenant_id, object_value):
    # Get service
    service = client.connect(
        owner="nobody",
        app="trackme",
        port=splunkd_port,
        token=session_key,
        timeout=600,
    )

    # Define the KV query
    query_string = {"object": object_value}

    # check for shared Elastic
    try:
        # Data collection
        collection_name = "kv_trackme_dsm_elastic_shared_tenant_" + str(tenant_id)
        collection = service.kvstore[collection_name]

        shared_records = collection.data.query(query=json.dumps(query_string))
        shared_record = shared_records[0]
        shared_key = shared_record.get("_key")

        # set info
        if re.match(r"^remote_", shared_record.get("search_mode")):
            # extract account and constraint
            match = re.match(
                r"account=\\{0,1}\"{0,1}(\w+)\\{0,1}\"{0,1}\s{0,1}\|\s{0,1}(.*)",
                shared_record.get("search_constraint"),
            )
            if match:
                shared_record["account"] = match.group(1)
                shared_record["search_constraint"] = match.group(2)
        else:
            shared_record["account"] = "local"
            shared_record["search_constraint"] = shared_record.get("search_constraint")

    except Exception as e:
        shared_key = None

    # check for dedicated Elastic
    try:
        # Data collection
        collection_name = "kv_trackme_dsm_elastic_dedicated_tenant_" + str(tenant_id)
        collection = service.kvstore[collection_name]

        dedicated_records = collection.data.query(query=json.dumps(query_string))
        dedicated_record = dedicated_records[0]
        dedicated_key = dedicated_record.get("_key")

        # set info
        if re.match(r"^remote_", dedicated_record.get("search_mode")):
            # extract account and constraint
            match = re.match(
                r"account=\\{0,1}\"{0,1}(\w+)\\{0,1}\"{0,1}\s{0,1}\|\s{0,1}(.*)",
                dedicated_record.get("search_constraint"),
            )
            if match:
                dedicated_record["account"] = match.group(1)
                dedicated_record["search_constraint"] = match.group(2)
        else:
            dedicated_record["account"] = "local"
            dedicated_record["search_constraint"] = dedicated_record.get(
                "search_constraint"
            )

    except Exception as e:
        dedicated_key = None

    # return
    if shared_key:
        # set the search_mode
        search_mode = None
        elastic_info = {}
        if shared_record.get("search_mode") in ("tstats", "remote_tstats"):
            search_mode = "tstats"
        elif shared_record.get("search_mode") in ("raw", "remote_raw"):
            search_mode = "raw"
        elif shared_record.get("search_mode") in ("from", "remote_from"):
            search_mode = "from"
        elif shared_record.get("search_mode") in ("mstats", "remote_mstats"):
            search_mode = "mstats"
        elif shared_record.get("search_mode") in ("mpreview", "remote_mpreview"):
            search_mode = "mpreview"

        elastic_info = {
            "is_elastic": 1,
            "type_elastic": "shared",
            "account": shared_record.get("account"),
            "search_mode": search_mode,
            "elastic_search_mode": shared_record.get("search_mode"),
            "search_constraint": shared_record.get("search_constraint"),
        }

        logging.debug(
            f'function=splk_dsm_return_elastic_info, elastic_type="shared", elastic_info="{json.dumps(elastic_info, indent=2)}"'
        )
        return elastic_info

    elif dedicated_key:
        # set the search_mode
        search_mode = None
        elastic_info = {}
        if dedicated_record.get("search_mode") in ("tstats", "remote_tstats"):
            search_mode = "tstats"
        elif dedicated_record.get("search_mode") in ("raw", "remote_raw"):
            search_mode = "raw"
        elif dedicated_record.get("search_mode") in ("from", "remote_from"):
            search_mode = "from"
        elif dedicated_record.get("search_mode") in ("mstats", "remote_mstats"):
            search_mode = "mstats"
        elif dedicated_record.get("search_mode") in ("mpreview", "remote_mpreview"):
            search_mode = "mpreview"

        elastic_info = {
            "is_elastic": 1,
            "type_elastic": "dedicated",
            "account": dedicated_record.get("account"),
            "search_mode": search_mode,
            "elastic_search_mode": dedicated_record.get("search_mode"),
            "search_constraint": dedicated_record.get("search_constraint"),
        }

        logging.debug(
            f'function=splk_dsm_return_elastic_info, elastic_type="dedicated", elastic_info="{json.dumps(elastic_info, indent=2)}"'
        )
        return elastic_info

    else:
        return {"is_elastic": 0}


# return main searches logics for that entity
def splk_dsm_return_searches(tenant_id, object_value, entity_info):
    # log debug
    logging.debug(
        f'Starting function=splk_dsm_return_searches with entity_info="{json.dumps(entity_info, indent=2)}"'
    )

    # define required searches dynamically based on the upstream entity information
    splk_dsm_overview_root_search = None
    splk_dsm_overview_single_stats = None
    splk_dsm_overview_timechart = None
    splk_dsm_raw_search = None
    splk_dsm_sampling_search = None

    try:
        ########
        # tstats
        ########

        if entity_info["search_mode"] == "tstats":
            splk_dsm_overview_root_search = (
                "| tstats dc(host) as dcount_host count latest(_indextime) as indextime max(_time) as maxtime where "
                + entity_info["search_constraint"]
                + " by _time, index, sourcetype, host, splunk_server span=1s | eval ingest_latency=(indextime-_time), event_delay=(now() - maxtime)"
            )

            splk_dsm_overview_single_stats = (
                splk_dsm_overview_root_search
                + " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
            )

            splk_dsm_overview_timechart = (
                splk_dsm_overview_root_search
                + " | timechart `auto_span` sum(count) as events_count, avg(ingest_latency) as avg_latency, max(dcount_host) as dcount_host"
            )

            if entity_info.get("account") == "local":
                splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
                    replace_encoded_doublebackslashes(entity_info["search_constraint"])
                )
                splk_dsm_sampling_search = (
                    "search "
                    + replace_encoded_doublebackslashes(
                        entity_info["search_constraint"]
                    )
                )
            else:
                splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search="'
                    + replace_encoded_fourbackslashes(
                        entity_info["search_constraint"]
                    ).replace('"', '\\"')
                    + '| head 1000" earliest="-24h" latest="now"'
                )
                splk_dsm_sampling_search = (
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search="'
                    + replace_encoded_fourbackslashes(
                        entity_info["search_constraint"]
                    ).replace('"', '\\"')
                    + '| head 1000" earliest="-24h" latest="now"'
                )

        #####
        # raw
        #####

        elif entity_info["search_mode"] == "raw":
            splk_dsm_overview_root_search = (
                entity_info["search_constraint"]
                + " | eventstats max(_time) as maxtime | eval ingest_latency=(_indextime-_time), event_delay=(now() - maxtime)"
            )

            splk_dsm_overview_single_stats = (
                splk_dsm_overview_root_search
                + " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
            )

            splk_dsm_overview_timechart = (
                splk_dsm_overview_root_search
                + " | timechart `auto_span` count as events_count, avg(ingest_latency) as avg_latency, dc(host) as dcount_host"
            )

            if entity_info.get("account") == "local":
                splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
                    replace_encoded_doublebackslashes(entity_info["search_constraint"])
                )
                splk_dsm_sampling_search = (
                    "search "
                    + replace_encoded_doublebackslashes(
                        entity_info["search_constraint"]
                    )
                )
            else:
                splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search="'
                    + replace_encoded_fourbackslashes(
                        entity_info["search_constraint"]
                    ).replace('"', '\\"')
                    + '| head 1000" earliest="-24h" latest="now"'
                )
                splk_dsm_sampling_search = (
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search="'
                    + replace_encoded_fourbackslashes(
                        entity_info["search_constraint"]
                    ).replace('"', '\\"')
                    + '| head 1000" earliest="-24h" latest="now"'
                )

        ######
        # from
        ######

        # from datamodel
        elif entity_info["search_mode"] == "from" and re.search(
            r"datamodel\:\"{0,1}", entity_info["search_constraint"]
        ):
            splk_dsm_overview_root_search = (
                "| from "
                + entity_info["search_constraint"]
                + "\n| eventstats max(_time) as maxtime"
                + "\n| eval ingest_latency=(_indextime-_time), event_delay=(now() - maxtime)"
            )

            splk_dsm_overview_single_stats = (
                splk_dsm_overview_root_search
                + " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
            )

            splk_dsm_overview_timechart = (
                splk_dsm_overview_root_search
                + " | timechart `auto_span` count as events_count, avg(ingest_latency) as avg_latency, dc(host) as dcount_host"
            )

            if entity_info.get("account") == "local":
                splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
                    "| from "
                    + replace_encoded_doublebackslashes(
                        entity_info["search_constraint"]
                    )
                )
                splk_dsm_sampling_search = "N/A"
            else:
                splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search=" from '
                    + replace_encoded_fourbackslashes(
                        entity_info["search_constraint"]
                    ).replace('"', '\\"')
                    + '| head 1000" earliest="-24h" latest="now"'
                )
                splk_dsm_sampling_search = "N/A"

        # from lookup
        elif entity_info["search_mode"] == "from" and re.search(
            r"lookup\:\"{0,1}", entity_info["search_constraint"]
        ):
            splk_dsm_overview_root_search = (
                "| mstats latest(_value) as value where `trackme_metrics_idx("
                + tenant_id
                + ')` (metric_name=trackme.splk.feeds.eventcount_4h OR metric_name=trackme.splk.feeds.lag_event_sec OR metric_name=trackme.splk.feeds.hostcount_4h) object_category="splk-dsm" object="'
                + object_value
                + '" by metric_name `auto_span` | eval {metric_name}=value'
                + "| stats first(trackme.splk.feeds.eventcount_4h) as count, first(trackme.splk.feeds.lag_event_sec) as ingest_latency, max(trackme.splk.feeds.hostcount_4h) as dcount_host by _time | eval event_delay=ingest_latency"
            )

            splk_dsm_overview_single_stats = (
                splk_dsm_overview_root_search
                + " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
            )

            splk_dsm_overview_timechart = (
                splk_dsm_overview_root_search
                + " | timechart `auto_span` latest(count) as events_count, avg(ingest_latency) as avg_latency, max(dcount_host) as dcount_host"
            )

            if entity_info.get("account") == "local":
                splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
                    "| from "
                    + replace_encoded_doublebackslashes(
                        entity_info["search_constraint"]
                    )
                    + " | head 1000"
                )
                splk_dsm_sampling_search = "N/A"
            else:
                splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search=" from '
                    + replace_encoded_fourbackslashes(
                        entity_info["search_constraint"]
                    ).replace('"', '\\"')
                    + '| head 1000" earliest="-24h" latest="now"'
                )
                splk_dsm_sampling_search = "N/A"

        ########
        # mstats
        ########

        elif entity_info["search_mode"] == "mstats":
            splk_dsm_overview_root_search = (
                "| mstats latest(_value) as value where `trackme_metrics_idx("
                + tenant_id
                + ')` (metric_name=trackme.splk.feeds.eventcount_4h OR metric_name=trackme.splk.feeds.lag_event_sec OR metric_name=trackme.splk.feeds.hostcount_4h) object_category="splk-dsm" object="'
                + object_value
                + '" by metric_name `auto_span` | eval {metric_name}=value'
                + "| stats first(trackme.splk.feeds.eventcount_4h) as count, first(trackme.splk.feeds.lag_event_sec) as ingest_latency, max(trackme.splk.feeds.hostcount_4h) as dcount_host by _time | eval event_delay=ingest_latency"
            )

            splk_dsm_overview_single_stats = (
                splk_dsm_overview_root_search
                + " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
            )

            splk_dsm_overview_timechart = (
                splk_dsm_overview_root_search
                + " | timechart `auto_span` latest(count) as events_count, avg(ingest_latency) as avg_latency, max(dcount_host) as dcount_host"
            )

            if entity_info.get("account") == "local":
                splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
                    '| mpreview index=* filter=" '
                    + replace_encoded_doublebackslashes(
                        entity_info["search_constraint"]
                    )
                    + '" earliest="-15m" latest="now"'
                )
                splk_dsm_sampling_search = "N/A"
            else:
                splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search=" | mpreview index=* filter=" '
                    + replace_encoded_fourbackslashes(
                        entity_info["search_constraint"]
                    ).replace('"', '\\"')
                    + '" earliest="-15m" latest="now" | head 1000" earliest="-24h" latest="now"'
                )
                splk_dsm_sampling_search = "N/A"

        #####
        # mpreview
        #####

        elif entity_info["search_mode"] == "mpreview":
            splk_dsm_overview_root_search = (
                entity_info["search_constraint"]
                + " | eventstats max(_time) as maxtime | eval ingest_latency=(_indextime-_time), event_delay=(now() - maxtime)"
            )

            splk_dsm_overview_single_stats = (
                splk_dsm_overview_root_search
                + " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
            )

            splk_dsm_overview_timechart = (
                splk_dsm_overview_root_search
                + " | timechart `auto_span` count as events_count, avg(ingest_latency) as avg_latency, dc(host) as dcount_host"
            )

            if entity_info.get("account") == "local":
                splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
                    "| mpreview "
                    + replace_encoded_doublebackslashes(
                        entity_info["search_constraint"]
                    )
                    + ' earliest="-15m" latest="now"'
                )
                splk_dsm_sampling_search = "N/A"
            else:
                splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search=" | mpreview '
                    + replace_encoded_fourbackslashes(
                        entity_info["search_constraint"]
                    ).replace('"', '\\"')
                    + ' earliest="-15m" latest="now" | head 1000" earliest="-24h" latest="now"'
                )
                splk_dsm_sampling_search = "N/A"

        ###########
        # if remote
        ###########

        # for all searches except the raw event search definition

        if entity_info.get("account") != "local":
            if not (entity_info["search_mode"] in ("mstats")) and not (
                entity_info["search_mode"] in ("from")
                and re.search(r"lookup\:\"{0,1}", entity_info["search_constraint"])
            ):
                splk_dsm_overview_root_search = (
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search="'
                    + splk_dsm_overview_root_search.replace('"', '\\"')
                    + '" earliest="-24h" latest="now"'
                )

                splk_dsm_overview_single_stats = (
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search="'
                    + splk_dsm_overview_single_stats.replace('"', '\\"')
                    + '" earliest="-24h" latest="now"'
                )

                splk_dsm_overview_timechart = (
                    splk_dsm_overview_timechart + " | where isnotnull(events_count)"
                )

                splk_dsm_overview_timechart = (
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search="'
                    + splk_dsm_overview_timechart.replace('"', '\\"')
                    + '" earliest="-24h" latest="now"'
                    + " | timechart `auto_span` first(events_count) as events_count, first(avg_latency) as avg_latency, first(dcount_host) as dcount_host"
                )

        # metrics populating search
        splk_dsm_metrics_populate_search = remove_leading_spaces(
            f"""\
                | mcatalog values(metric_name) as metrics where `trackme_metrics_idx({tenant_id})` tenant_id="{tenant_id}" object_category="splk-dsm" object="{object_value}" metric_name=*
                | mvexpand metrics
                | rename metrics as metric_name
                | rex field=metric_name "^trackme\\.splk\\.feeds\\.(?<label>.*)"
                | eval order=if(metric_name=="trackme.splk.feeds.status", 0, 1)
                | sort 0 order
                | fields - order
            """
        )

        # return
        response = {
            "splk_dsm_overview_root_search": splk_dsm_overview_root_search,
            "splk_dsm_overview_single_stats": splk_dsm_overview_single_stats,
            "splk_dsm_overview_timechart": splk_dsm_overview_timechart,
            "splk_dsm_raw_search": splk_dsm_raw_search,
            "splk_dsm_sampling_search": splk_dsm_sampling_search,
            "splk_dsm_metrics_populate_search": splk_dsm_metrics_populate_search,
        }

        logging.debug(
            f'function=splk_dsm_return_searches, response="{json.dumps(response, indent=2)}"'
        )
        return response

    except Exception as e:
        logging.error(
            f'function=splk_dsm_return_searches, an exception was encountered, exception="{str(e)}"'
        )
        raise Exception(e)


# process and return main entity info
def splk_dhm_return_entity_info(object_dict):
    # empty response
    response = {}

    #
    # extract the account
    #

    # check and extract
    if re.search(r"^(?:remote|remoteraw)\|", object_dict.get("object")):
        # extract the account
        match = re.search(
            r"^(?:remote|remoteraw)\|account:(\w*)\|", object_dict.get("object")
        )
        if match:
            response["account"] = match.group(1)

    # local
    else:
        response["account"] = "local"

    #
    # get and add the search_mode
    #

    response["search_mode"] = object_dict.get("search_mode")

    #
    # extract the break by statement and special key, if any
    #

    # check and extract
    if re.search(r"(?:key)\:", object_dict.get("object")):
        # tstats special key
        if re.search(r"(?:key)\:", object_dict.get("object")):
            # extract key and value
            match = re.search(r"(?:key)\:([^\|]*)\|(.*)", object_dict.get("object"))
            if match:
                response["breakby_key"] = match.group(1)
                response["breakby_value"] = match.group(2)

        # raw special key
        elif re.search(r"(?:rawkey)\:", object_dict.get("object")):
            # extract key and value
            match = re.search(r"(?:rawkey)\:([^\|]*)\|(.*)", object_dict.get("object"))
            if match:
                response["breakby_key"] = match.group(1)
                response["breakby_value"] = match.group(2)

        # no match, fallback
        else:
            response["breakby_key"] = "none"
            response["breakby_value"] = "none"
            response["breakby_statement"] = "index, sourcetype"

    # no special key
    else:
        response["breakby_key"] = "none"
        response["breakby_value"] = "none"
        response["breakby_statement"] = "index, sourcetype"

    # return
    return response


# return main searches logics for that entity
def splk_dhm_return_searches(tenant_id, object_value, entity_info):
    # log debug
    logging.debug(
        f'Starting function=splk_dhm_return_searches with entity_info="{json.dumps(entity_info, indent=2)}"'
    )

    # define required searches dynamically based on the upstream entity information
    splk_dhm_overview_root_search = None
    splk_dhm_overview_timechart = None
    splk_dhm_overview_pie_root_search = None
    splk_dhm_raw_search = None

    try:
        ########
        # tstats
        ########

        if entity_info["search_mode"] == "tstats":
            splk_dhm_overview_root_search = (
                "| tstats count, max(_indextime) as indextime, max(_time) as maxtime where "
                + replace_encoded_doublebackslashes(entity_info["search_constraint"])
                + " by _time, index, sourcetype, splunk_server span=1s | eval ingest_latency=(indextime-_time), event_delay=(now() - maxtime) | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
            )

            splk_dhm_overview_timechart = (
                "| tstats count, max(_indextime) as indextime, max(_time) as maxtime where "
                + replace_encoded_doublebackslashes(entity_info["search_constraint"])
                + " by _time, index, sourcetype, splunk_server span=1s | eval ingest_latency=(indextime-_time), event_delay=(now() - maxtime) | timechart `auto_span` sum(count) as events_count, avg(ingest_latency) as avg_latency"
            )

            splk_dhm_overview_pie_root_search = (
                "| tstats count where "
                + replace_encoded_doublebackslashes(entity_info["search_constraint"])
                + " by index, sourcetype"
            )

            if entity_info.get("account") == "local":
                splk_dhm_raw_search = "search?q=" + urllib.parse.quote(
                    replace_encoded_doublebackslashes(entity_info["search_constraint"])
                )
            else:
                splk_dhm_raw_search = "search?q=" + urllib.parse.quote(
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search="'
                    + replace_encoded_fourbackslashes(
                        entity_info["search_constraint"]
                    ).replace('"', '\\"')
                    + '| head 1000" earliest="-24h" latest="now"'
                )

        #####
        # raw
        #####

        elif entity_info["search_mode"] == "raw":
            splk_dhm_overview_root_search = (
                replace_encoded_doublebackslashes(entity_info["search_constraint"])
                + " | eventstats max(_time) as maxtime | eval ingest_latency=(_indextime-_time), event_delay=now()-maxtime | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
            )

            splk_dhm_overview_timechart = (
                replace_encoded_doublebackslashes(entity_info["search_constraint"])
                + " | eventstats max(_time) as maxtime | eval ingest_latency=(_indextime-_time), event_delay=now()-maxtime | timechart `auto_span` count as events_count, avg(ingest_latency) as avg_latency"
            )

            splk_dhm_overview_pie_root_search = (
                replace_encoded_doublebackslashes(entity_info["search_constraint"])
                + " | stats count by index, sourcetype"
            )

            if entity_info.get("account") == "local":
                splk_dhm_raw_search = "search?q=" + urllib.parse.quote(
                    replace_encoded_doublebackslashes(entity_info["search_constraint"])
                )
            else:
                splk_dhm_raw_search = "search?q=" + urllib.parse.quote(
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search="'
                    + replace_encoded_fourbackslashes(
                        entity_info["search_constraint"]
                    ).replace('"', '\\"')
                    + '| head 1000" earliest="-24h" latest="now"'
                )

        ###########
        # if remote
        ###########

        # for all searches except the raw event search definition

        if entity_info.get("account") != "local":
            if not entity_info["search_mode"] in ("from", "mstats"):
                splk_dhm_overview_root_search = (
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search="'
                    + splk_dhm_overview_root_search.replace('"', '\\"')
                    + '" earliest="-24h" latest="now"'
                )

                splk_dhm_overview_timechart = (
                    splk_dhm_overview_timechart + " | where isnotnull(events_count)"
                )

                splk_dhm_overview_timechart = (
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search="'
                    + splk_dhm_overview_timechart.replace('"', '\\"')
                    + '" earliest="-24h" latest="now"'
                    + " | timechart `auto_span` first(events_count) as events_count, first(avg_latency) as avg_latency"
                )

                splk_dhm_overview_pie_root_search = (
                    '| splunkremotesearch account="'
                    + entity_info.get("account")
                    + '" search="'
                    + splk_dhm_overview_pie_root_search.replace('"', '\\"')
                    + '" earliest="-24h" latest="now"'
                )

        # metrics populating search
        splk_dhm_metrics_populate_search = remove_leading_spaces(
            f"""\
                | mcatalog values(metric_name) as metrics where `trackme_metrics_idx({tenant_id})` tenant_id="{tenant_id}" object_category="splk-dhm" object="{object_value}" metric_name=*
                | mvexpand metrics
                | rename metrics as metric_name
                | rex field=metric_name "^trackme\\.splk\\.feeds\\.(?<label>.*)"
                | eval order=if(metric_name=="trackme.splk.feeds.status", 0, 1)
                | sort 0 order
                | fields - order
            """
        )

        # return
        return {
            "splk_dhm_overview_root_search": splk_dhm_overview_root_search,
            "splk_dhm_overview_timechart": splk_dhm_overview_timechart,
            "splk_dhm_overview_pie_root_search": splk_dhm_overview_pie_root_search,
            "splk_dhm_raw_search": splk_dhm_raw_search,
            "splk_dhm_metrics_populate_search": splk_dhm_metrics_populate_search,
        }

    except Exception as e:
        logging.error(
            f'function splk_dhm_return_searches, an exception was encountered, exception="{str(e)}"'
        )
        raise Exception(e)


# process and return main entity info
def splk_mhm_return_entity_info(object_dict):
    # empty response
    response = {}

    #
    # extract the account
    #

    # check and extract
    if re.search(r"^(?:remote|remoteraw)\|", object_dict.get("object")):
        # extract the account
        match = re.search(
            r"^(?:remote|remoteraw)\|account:(\w*)\|", object_dict.get("object")
        )
        if match:
            response["account"] = match.group(1)

    # local
    else:
        response["account"] = "local"

    #
    # get and add the search_mode
    #

    response["search_mode"] = "mstats"

    #
    # extract the break by statement and special key, if any
    #

    # check and extract
    if re.search(r"(?:key)\:", object_dict.get("object")):
        # tstats special key
        if re.search(r"(?:key)\:", object_dict.get("object")):
            # extract key and value
            match = re.search(r"(?:key)\:([^\|]*)\|(.*)", object_dict.get("object"))
            if match:
                response["breakby_key"] = match.group(1)
                response["breakby_value"] = match.group(2)

        # raw special key
        elif re.search(r"(?:rawkey)\:", object_dict.get("object")):
            # extract key and value
            match = re.search(r"(?:rawkey)\:([^\|]*)\|(.*)", object_dict.get("object"))
            if match:
                response["breakby_key"] = match.group(1)
                response["breakby_value"] = match.group(2)

        # no match, fallback
        else:
            response["breakby_key"] = "none"
            response["breakby_value"] = "none"
            response["breakby_statement"] = "index, sourcetype"

    # no special key
    else:
        response["breakby_key"] = "none"
        response["breakby_value"] = "none"
        response["breakby_statement"] = "index, sourcetype"

    # return
    return response


# return main searches logics for that entity
def splk_mhm_return_searches(tenant_id, object_value, entity_info):
    # log debug
    logging.debug(
        f'Starting function=splk_mhm_return_searches with entity_info="{json.dumps(entity_info, indent=2)}"'
    )

    # define required searches dynamically based on the upstream entity information
    splk_mhm_mctalog_search = None
    splk_mhn_metrics_report = None
    splk_mhn_mpreview = None

    try:
        ########
        # mstats
        ########

        # get the breakby_key
        breakby_key = entity_info["breakby_key"]
        if breakby_key == "none":
            breakby_key = "host"

        # mcatalog
        splk_mhm_mctalog_search = (
            "| mcatalog values(metric_name) as metrics, values(_dims) as dims where metric_name=* "
            + replace_encoded_doublebackslashes(entity_info["search_constraint"])
            + " by index"
        )

        if entity_info.get("account") == "local":
            splk_mhm_mctalog_search = "search?q=" + urllib.parse.quote(
                splk_mhm_mctalog_search
            )
        else:
            splk_mhm_mctalog_search = "search?q=" + urllib.parse.quote(
                '| splunkremotesearch account="'
                + entity_info.get("account")
                + '" search="'
                + splk_mhm_mctalog_search.replace('"', '\\"')
                + '| head 1000" earliest="-24h" latest="now"'
            )

        # metrics report
        splk_mhn_metrics_report = (
            "| mstats latest(_value) as value where metric_name=* "
            + replace_encoded_doublebackslashes(entity_info["search_constraint"])
            + " by metric_name, index, "
            + breakby_key
            + " span=1m"
            + " | stats max(_time) as _time by metric_name, index, "
            + breakby_key
            + r' | rex field=metric_name "(?<metric_category>[^\.]*)\.{0,1}"'
            + " | stats values(metric_name) as metric_name, max(_time) as _time by metric_category, index, "
            + breakby_key
            + " | eval metric_current_lag_sec=(now() - _time)"
        )

        if entity_info.get("account") == "local":
            splk_mhn_metrics_report = "search?q=" + urllib.parse.quote(
                splk_mhn_metrics_report
            )
        else:
            splk_mhn_metrics_report = "search?q=" + urllib.parse.quote(
                '| splunkremotesearch account="'
                + entity_info.get("account")
                + '" search="'
                + splk_mhn_metrics_report.replace('"', '\\"')
                + '" earliest="-24h" latest="now"'
            )

        # mpreview
        if entity_info["search_constraint"] != "none":
            splk_mhn_mpreview = (
                '| mpreview index=* filter="'
                + entity_info["breakby_key"]
                + "="
                + entity_info["breakby_value"]
                + '"'
            )
        else:
            splk_mhn_mpreview = (
                '| mpreview index=* filter="host=' + entity_info["breakby_value"] + '"'
            )

        if entity_info.get("account") == "local":
            splk_mhn_mpreview = "search?q=" + urllib.parse.quote(splk_mhn_mpreview)
        else:
            splk_mhn_mpreview = "search?q=" + urllib.parse.quote(
                '| splunkremotesearch account="'
                + entity_info.get("account")
                + '" search="'
                + splk_mhn_mpreview.replace('"', '\\"')
                + '" earliest="-15m" latest="now"'
            )

        # return
        return {
            "splk_mhm_mctalog_search": splk_mhm_mctalog_search,
            "splk_mhm_mctalog_search_litsearch": urllib.parse.unquote(
                splk_mhm_mctalog_search.replace("search?q=", "")
            ),
            "splk_mhn_metrics_report": splk_mhn_metrics_report,
            "splk_mhn_metrics_report_litsearch": urllib.parse.unquote(
                splk_mhn_metrics_report.replace("search?q=", "")
            ),
            "splk_mhn_mpreview": splk_mhn_mpreview,
            "splk_mhn_mpreview_litsearch": urllib.parse.unquote(
                splk_mhn_mpreview.replace("search?q=", "")
            ),
        }

    except Exception as e:
        logging.error(
            f'function splk_mhm_return_searches, an exception was encountered, exception="{str(e)}"'
        )
        raise Exception(e)


# return simulation search for splk-dsm hybrid trackers
def splk_dsm_hybrid_tracker_simulation_return_searches(simulation_info):
    # log debug
    logging.debug(
        f'Starting function=splk_dsm_hybrid_tracker_simulation_return_searches with simulation_info="{json.dumps(simulation_info, indent=2)}"'
    )

    # define required searches dynamically based on the upstream entity information
    tracker_simulation_search = None

    try:
        ####################
        # component splk-dsm
        ####################

        if simulation_info["component"] in ("dsm"):

            # breakby statement
            breakby_statement = None
            breakby_field_include_sourcetype = simulation_info.get(
                "breakby_field_include_sourcetype", True
            )

            if simulation_info["breakby_field"] in ("none", "split"):
                breakby_statement = "index, sourcetype"
            elif simulation_info["breakby_field"] in ("merged"):
                breakby_statement = "index"
            else:
                if not breakby_field_include_sourcetype:
                    breakby_statement = "index, " + simulation_info["breakby_field"]
                else:
                    breakby_statement = (
                        "index, sourcetype, " + simulation_info["breakby_field"]
                    )

            # object definition statement
            object_definition = None
            if simulation_info["breakby_field"] in ("none", "split"):
                object_definition = 'data_index . ":" . data_sourcetype'
            elif simulation_info["breakby_field"] in ("merged"):
                object_definition = 'data_index . ":" . "@all"'
            else:
                # support multiple fields
                break_by_field = simulation_info["breakby_field"].split(",")

                if len(break_by_field) == 1:

                    # sourcetype to any with a custom breakby
                    if not breakby_field_include_sourcetype:
                        object_definition = (
                            'data_index . ":" . "any" . "|key:" . "'
                            + simulation_info["breakby_field"]
                            + '" . "|" . '
                            + simulation_info["breakby_field"]
                        )

                    # otherwise
                    else:
                        object_definition = (
                            'data_index . ":" . data_sourcetype . "|key:" . "'
                            + simulation_info["breakby_field"]
                            + '" . "|" . '
                            + simulation_info["breakby_field"]
                        )

                else:

                    # sourcetype to any with a custom breakby
                    if not breakby_field_include_sourcetype:
                        object_definition = (
                            'data_index . ":" . "any" . "|key:" . "'
                            + simulation_info["breakby_field"].replace(",", ";")
                            + '" . "|"'
                        )

                    # otherwise
                    else:
                        object_definition = (
                            'data_index . ":" . data_sourcetype . "|key:" . "'
                            + simulation_info["breakby_field"].replace(",", ";")
                            + '" . "|"'
                        )

                    append_count = 0
                    for subbreak_by_field in break_by_field:
                        if append_count == 0:
                            object_definition = (
                                object_definition + " . " + subbreak_by_field
                            )
                        else:
                            object_definition = (
                                object_definition
                                + " . "
                                + '";"'
                                + " . "
                                + subbreak_by_field
                            )
                        append_count += 1

            # depends on account
            if simulation_info["account"] != "local":
                object_definition = (
                    "object = "
                    + '"remote|account:'
                    + simulation_info["account"]
                    + '|" . '
                    + object_definition
                )
            else:
                object_definition = "object = " + object_definition

            ########
            # tstats
            ########

            if simulation_info["search_mode"] == "tstats":
                logging.info("Processing with search_mode=tstats")
                tracker_simulation_search = (
                    "| tstats count, dc(host) as dcount_host where (index=* OR index=_*) "
                    + simulation_info["search_constraint"]
                    + " _index_earliest="
                    + simulation_info["index_earliest_time"]
                    + " _index_latest="
                    + simulation_info["index_latest_time"]
                    + " by "
                    + breakby_statement
                    + "\n| rename index as data_index, sourcetype as data_sourcetype"
                    + "\n| eval "
                    + object_definition
                    + "\n| stats values(data_index) as indexes, dc(object) as dcount_entities, values(object) as entities"
                    + "\n| mvexpand entities | head 100 | stats values(indexes) as indexes, first(dcount_entities) as dcount_entities, values(entities) as entities_sample\n"
                )

            ########
            # raw
            ########

            elif simulation_info["search_mode"] == "raw":
                logging.info("Processing with search_mode=raw")
                tracker_simulation_search = (
                    "(index=* OR index=_*) "
                    + simulation_info["search_constraint"]
                    + " _index_earliest="
                    + simulation_info["index_earliest_time"]
                    + " _index_latest="
                    + simulation_info["index_latest_time"]
                    + "\n| stats count, dc(host) as dcount_host by "
                    + breakby_statement
                    + "\n| rename index as data_index, sourcetype as data_sourcetype"
                    + "\n| eval "
                    + object_definition
                    + "\n| stats values(data_index) as indexes, dc(object) as dcount_entities, values(object) as entities"
                    + "\n| mvexpand entities | head 100 | stats values(indexes) as indexes, first(dcount_entities) as dcount_entities, values(entities) as entities_sample\n"
                )

        ####################
        # component splk-dhm
        ####################

        elif simulation_info["component"] in ("dhm"):
            # breakby statement
            breakby_statement = None
            if simulation_info["breakby_field"] in ("host", "none"):
                breakby_statement = "index, sourcetype, host"
            else:
                breakby_statement = (
                    "index, sourcetype, " + simulation_info["breakby_field"]
                )

            # object definition statement
            object_definition = None
            if simulation_info["breakby_field"] in ("host", "none"):
                object_definition = "host"
            else:
                object_definition = simulation_info["breakby_field"]

            # depends on account
            if simulation_info["account"] != "local":
                object_definition = (
                    "object = "
                    + '"remote|account:'
                    + simulation_info["account"]
                    + '|" . '
                    + object_definition
                )
            else:
                object_definition = "object = " + object_definition

            ########
            # tstats
            ########

            if simulation_info["search_mode"] == "tstats":
                logging.info("Processing with search_mode=tstats")
                tracker_simulation_search = (
                    '| tstats count, dc(host) as dcount_host where (index=* OR index=_*) (host=* host!="") '
                    + simulation_info["search_constraint"]
                    + " _index_earliest="
                    + simulation_info["index_earliest_time"]
                    + " _index_latest="
                    + simulation_info["index_latest_time"]
                    + " by "
                    + breakby_statement
                    + "\n| rename index as data_index, sourcetype as data_sourcetype"
                    + "\n| eval "
                    + object_definition
                    + "\n| stats values(data_index) as indexes, dc(object) as dcount_entities, values(object) as entities"
                    + "\n| mvexpand entities | head 100 | stats values(indexes) as indexes, first(dcount_entities) as dcount_entities, values(entities) as entities_sample\n"
                )

            ########
            # raw
            ########

            elif simulation_info["search_mode"] == "raw":
                logging.info("Processing with search_mode=raw")
                tracker_simulation_search = (
                    '(index=* OR index=_*) (host=* host!="") '
                    + simulation_info["search_constraint"]
                    + " _index_earliest="
                    + simulation_info["index_earliest_time"]
                    + " _index_latest="
                    + simulation_info["index_latest_time"]
                    + "\n| stats count, dc(host) as dcount_host by "
                    + breakby_statement
                    + "\n| rename index as data_index, sourcetype as data_sourcetype"
                    + "\n| eval "
                    + object_definition
                    + "\n| stats values(data_index) as indexes, dc(object) as dcount_entities, values(object) as entities"
                    + "\n| mvexpand entities | head 100 | stats values(indexes) as indexes, first(dcount_entities) as dcount_entities, values(entities) as entities_sample\n"
                )

        ####################
        # component splk-mhm
        ####################

        elif simulation_info["component"] in ("mhm"):
            # breakby statement
            breakby_statement = None
            if simulation_info["breakby_field"] in ("host", "none"):
                breakby_statement = "index, metric_name, host"
            else:
                breakby_statement = (
                    "index, metric_name, " + simulation_info["breakby_field"]
                )

            # object definition statement
            object_definition = None
            if simulation_info["breakby_field"] in ("host", "none"):
                object_definition = "host"
            else:
                object_definition = simulation_info["breakby_field"]

            # depends on account
            if simulation_info["account"] != "local":
                object_definition = (
                    "object = "
                    + '"remote|account:'
                    + simulation_info["account"]
                    + '|" . '
                    + object_definition
                )
            else:
                object_definition = "object = " + object_definition

            ########
            # mstats
            ########

            # splk-mhm only supports mstats
            logging.info("Processing with search_mode=mstats")
            tracker_simulation_search = (
                "| mstats latest(_value) as value where (index=* OR index=_*) (metric_name=*) "
                + simulation_info["search_constraint"]
                + " by "
                + breakby_statement
                + "\n| rename index as metric_index"
                + "\n| eval "
                + object_definition
                + "\n| stats values(metric_index) as indexes, dc(object) as dcount_entities, values(object) as entities"
                + "\n| mvexpand entities | head 100 | stats values(indexes) as indexes, first(dcount_entities) as dcount_entities, values(entities) as entities_sample\n"
            )

        ###########
        # if remote
        ###########

        # for all searches except the raw event search definition

        if simulation_info.get("account") != "local":
            tracker_simulation_search = (
                '| splunkremotesearch account="'
                + simulation_info.get("account")
                + '" search="'
                + tracker_simulation_search.replace('"', '\\"')
                + '" earliest="'
                + simulation_info.get("earliest_time")
                + '" latest="'
                + simulation_info.get("latest_time")
                + '" | fields - _raw'
            )

        # log debug
        logging.debug(f'tracker_simulation_search="{tracker_simulation_search}"')

        # return
        return {
            "tracker_simulation_search": tracker_simulation_search,
        }

    except Exception as e:
        logging.error(
            f'function splk_dsm_hybrid_tracker_simulation_return_searches, an exception was encountered, exception="{str(e)}"'
        )
        raise Exception(e)


def generate_dhm_report_search(
    entity_info,
    search_mode,
    tenant_id,
    account,
    index_earliest_time,
    index_latest_time,
    earliest_time,
    latest_time,
    root_constraint,
    dhm_tstats_root_breakby_include_splunk_server,
    dhm_tstats_root_time_span,
    breakby_field,
):
    #
    # breaby statement
    #

    # set breakby_field if none
    if breakby_field == "none":
        breakby_field = None

    #
    # define trackme_root_splitby and trackme_aggreg_splitby
    #

    breakby_field_list = ["index", "sourcetype", "splunk_server"]
    if breakby_field:
        custom_breakby_field_list = breakby_field.split(",")
        for field in custom_breakby_field_list:
            if not field in breakby_field_list:
                breakby_field_list.append(field)
        # set meta
        trackme_dhm_host_meta = str(breakby_field)
    else:
        breakby_field_list.append("host")
        # set meta
        trackme_dhm_host_meta = "host"

    # translates into a csv list whle handling few more options
    trackme_root_splitby = []
    for field in breakby_field_list:
        if field in ("index", "sourcetype"):
            trackme_root_splitby.append(field)
        elif field == "splunk_server":
            if dhm_tstats_root_breakby_include_splunk_server:
                trackme_root_splitby.append(field)
        else:
            trackme_root_splitby.append(field)

    # return as csv list
    trackme_root_splitby = ",".join(trackme_root_splitby)

    # aggreg split by (required for tstats searches)
    trackme_aggreg_splitby_list = ["index", "sourcetype"]
    if breakby_field:
        custom_breakby_field_list = breakby_field.split(",")
        for field in custom_breakby_field_list:
            if not field in trackme_aggreg_splitby_list:
                trackme_aggreg_splitby_list.append(field)
    else:
        trackme_aggreg_splitby_list.append("host")

    # translates into a csv list
    trackme_aggreg_splitby = ",".join(trackme_aggreg_splitby_list)

    # set tracker_type
    if account == "local":
        tracker_type = "local"
    else:
        tracker_type = "remote"

    #
    # define search string aggreg
    #

    if tracker_type == "local":
        search_string_aggreg = (
            "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
            + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
            + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
            + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
            + "sum(data_eventcount) as data_eventcount by "
            + str(trackme_aggreg_splitby)
            + "\n"
            + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)\n"
            + ' | eval host="key:'
            + str(trackme_dhm_host_meta)
            + '|" . '
            + str(trackme_dhm_host_meta)
        )

    elif tracker_type == "remote":
        if search_mode in "tstats":
            search_string_aggreg = (
                "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                + "sum(data_eventcount) as data_eventcount by "
                + str(trackme_aggreg_splitby)
                + "\n"
                + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)\n"
                + ' | eval host=\\"remote|account:'
                + str(account.replace('"', ""))
                + "|key:"
                + str(trackme_dhm_host_meta)
                + '|\\" . '
                + str(trackme_dhm_host_meta)
            )

        elif search_mode in "raw":
            search_string_aggreg = (
                "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                + "sum(data_eventcount) as data_eventcount by "
                + str(trackme_aggreg_splitby)
                + "\n"
                + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)\n"
                + ' | eval host=\\"remoteraw|account:'
                + str(account.replace('"', ""))
                + "|key:"
                + str(trackme_dhm_host_meta)
                + '|\\" . '
                + str(trackme_dhm_host_meta)
            )

    # report search
    if tracker_type == "local":
        if search_mode in "tstats":
            report_search = (
                "| tstats max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
                + 'count as data_eventcount, dc(host) as dcount_host where (host=* host!="") '
                + str(root_constraint)
                + ' _index_earliest="'
                + index_earliest_time
                + '" _index_latest="'
                + index_latest_time
                + '"'
                + " by _time,"
                + str(trackme_root_splitby)
                + " span="
                + str(dhm_tstats_root_time_span)
                + "\n| eval data_last_ingestion_lag_seen=data_last_ingest-data_last_time_seen"
                + "\n``` intermediate calculation ```"
                + "\n| bucket _time span=1m"
                + "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, max(data_last_time_seen) as data_last_time_seen, sum(data_eventcount) as data_eventcount by _time,"
                + str(trackme_aggreg_splitby)
                + "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
                + str(trackme_aggreg_splitby)
                + " | eval spantime=if(spantime>=(now()-300), spantime, null())"
                + "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
                + str(trackme_aggreg_splitby)
                + "\n| "
                + str(search_string_aggreg)
                + "\n``` tenant_id ```"
                + '\n| eval tenant_id="'
                + str(tenant_id)
                + '"'
                + "\n``` call the abstract macro ```"
                + "\n| `trackme_dhm_tracker_abstract("
                + str(tenant_id)
                + ", tstats)`"
            )

        elif search_mode in "raw":
            report_search = (
                str(root_constraint)
                + ' (host=* host!="")'
                + ' _index_earliest="'
                + index_earliest_time
                + '" _index_latest="'
                + index_latest_time
                + '"'
                + "\n| eval data_last_ingestion_lag_seen=(_indextime-_time)"
                + "\n``` intermediate calculation ```"
                + "\n| bucket _time span=1m"
                + "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
                + "count as data_eventcount by _time,"
                + str(trackme_aggreg_splitby)
                + "\n| eval spantime=data_last_ingest | eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m by spantime,"
                + str(trackme_aggreg_splitby)
                + "\n| "
                + str(search_string_aggreg)
                + "\n``` tenant_id ```\n"
                + '\n| eval tenant_id="'
                + str(tenant_id)
                + '"'
                + "\n``` call the abstract macro ```"
                + "\n| `trackme_dhm_tracker_abstract("
                + str(tenant_id)
                + ", raw)`"
            )

    elif tracker_type == "remote":
        if search_mode in "tstats":
            report_search = (
                '| splunkremotesearch account="'
                + str(account)
                + '"'
                + ' search="'
                + "| tstats max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
                + 'count as data_eventcount where (host=* host!=\\"\\") '
                + str(root_constraint.replace('"', '\\"'))
                + ' _index_earliest="'
                + index_earliest_time
                + '" _index_latest="'
                + index_latest_time
                + '"'
                + " by _time,"
                + str(trackme_root_splitby)
                + " span="
                + str(dhm_tstats_root_time_span)
                + "\n| eval data_last_ingestion_lag_seen=data_last_ingest-data_last_time_seen"
                + "\n``` intermediate calculation ```"
                + "\n| bucket _time span=1m"
                + "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, max(data_last_time_seen) as data_last_time_seen, sum(data_eventcount) as data_eventcount by _time,"
                + str(trackme_aggreg_splitby)
                + "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
                + str(trackme_aggreg_splitby)
                + " | eval spantime=if(spantime>=(now()-300), spantime, null())"
                + "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
                + str(trackme_aggreg_splitby)
                + "\n| "
                + str(search_string_aggreg)
                + '" earliest="'
                + str(earliest_time)
                + '" '
                + 'latest="'
                + str(latest_time)
                + '" tenant_id="'
                + str(tenant_id)
                + '" component="splk-dhm"'
                + "\n``` set tenant_id ```"
                + '\n| eval tenant_id="'
                + str(tenant_id)
                + '"'
                + "\n``` call the abstract macro ```"
                + "\n`trackme_dhm_tracker_abstract("
                + str(tenant_id)
                + ", tstats)`"
            )

        elif search_mode in "raw":
            report_search = (
                '| splunkremotesearch account="'
                + str(account)
                + '"'
                + ' search="'
                + 'search (host=* host!=\\"\\") '
                + str(root_constraint.replace('"', '\\"'))
                + ' _index_earliest="'
                + index_earliest_time
                + '" _index_latest="'
                + index_latest_time
                + '"'
                + "\n| eval data_last_ingestion_lag_seen=(_indextime-_time)"
                + "\n``` intermediate calculation ```"
                + "\n| bucket _time span=1m"
                + "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
                + "count as data_eventcount, dc(host) as dcount_host by _time,"
                + str(trackme_aggreg_splitby)
                + "\n| eval spantime=data_last_ingest | eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m by spantime,"
                + str(trackme_aggreg_splitby)
                + "\n| "
                + str(search_string_aggreg)
                + '" earliest="'
                + str(earliest_time)
                + '" '
                + 'latest="'
                + str(latest_time)
                + '" tenant_id="'
                + str(tenant_id)
                + '" component="splk-dhm"'
                + "\n``` tenant_id ```"
                + '\n| eval tenant_id="'
                + str(tenant_id)
                + '"'
                + "\n``` call the abstract macro ```"
                + "\n`trackme_dhm_tracker_abstract("
                + str(tenant_id)
                + ", raw)`"
            )

    #
    # finalize the search
    #

    report_search = remove_leading_spaces(
        f"""\
        {report_search}
        ``` collects latest collection state into the summary index ```
        | `trackme_collect_state("current_state_tracking:splk-dhm:{tenant_id}", "object", "{tenant_id}")`

        ``` output flipping change status if changes ```
        | trackmesplkgetflipping tenant_id="{tenant_id}" object_category="splk-dhm"
        | `trackme_outputlookup(trackme_dhm_tenant_{tenant_id}, key, {tenant_id})`
        | `trackme_mcollect(object, splk-dhm, "metric_name:trackme.splk.feeds.avg_eventcount_5m=avg_eventcount_5m, metric_name:trackme.splk.feeds.latest_eventcount_5m=latest_eventcount_5m, metric_name:trackme.splk.feeds.perc95_eventcount_5m=perc95_eventcount_5m, metric_name:trackme.splk.feeds.stdev_eventcount_5m=stdev_eventcount_5m, metric_name:trackme.splk.feeds.avg_latency_5m=avg_latency_5m, metric_name:trackme.splk.feeds.latest_latency_5m=latest_latency_5m, metric_name:trackme.splk.feeds.perc95_latency_5m=perc95_latency_5m, metric_name:trackme.splk.feeds.stdev_latency_5m=stdev_latency_5m, metric_name:trackme.splk.feeds.eventcount_4h=data_eventcount, metric_name:trackme.splk.feeds.lag_event_sec=data_last_lag_seen, metric_name:trackme.splk.feeds.lag_ingestion_sec=data_last_ingestion_lag_seen", "tenant_id, object_category, object", "{tenant_id}")`
        """
    )

    return report_search


# Usage:
# report_search = generate_dsm_report_search(
#     tracker_type='local',
#     search_mode='tstats',
#     tenant_id='tenant1',
#     root_constraint='index=*',
#     index_earliest_time='-24h',
#     index_latest_time='now',
#     dsm_tstats_root_time_span='1m',
#     trackme_root_splitby='source',
#     trackme_aggreg_splitby='source',
#     search_string_aggreg='| stats sum(data_eventcount) as data_eventcount',
#     tracker_name='my_tracker',
#     account='my_account',
#     earliest_time='-5m',
#     latest_time='now'
# )


def generate_dsm_report_search(
    entity_info,
    search_mode,
    tenant_id,
    account,
    index_earliest_time,
    index_latest_time,
    earliest_time,
    latest_time,
    root_constraint,
    dsm_tstats_root_time_span,
    breakby_field,
    dsm_tstats_root_breakby_include_splunk_server,
    dsm_tstats_root_breakby_include_host,
):
    #
    logging.debug(
        f"retrieving search with function generate_dsm_report_search, search_mode={search_mode}, tenant_id={tenant_id}, account={account}, index_earliest_time={index_earliest_time}, index_latest_time={index_latest_time}, earliest_time={earliest_time}, latest_time={latest_time}, root_constraint={root_constraint}, dsm_tstats_root_time_span={dsm_tstats_root_time_span}, breakby_field={breakby_field}, dsm_tstats_root_breakby_include_splunk_server={dsm_tstats_root_breakby_include_splunk_server}, dsm_tstats_root_breakby_include_host={dsm_tstats_root_breakby_include_host}"
    )

    #
    # breaby statement
    #

    # set breakby_field if none
    if breakby_field == "none":
        breakby_field = None

    #
    # define trackme_root_splitby and trackme_aggreg_splitby
    #

    breakby_field_list = ["index", "sourcetype", "splunk_server", "host"]

    # default for breakby_field_include_sourcetype
    breakby_field_include_sourcetype = True

    if breakby_field and breakby_field != "merged":

        # if sourcetype in entity_info is set to *, then breakby_field_include_sourcetype is False
        if entity_info["sourcetype"] == "*":
            breakby_field_include_sourcetype = False

        custom_breakby_field_list = breakby_field.split(",")
        for field in custom_breakby_field_list:
            if not field in breakby_field_list:
                breakby_field_list.append(field)

    # translates into a csv list whle handling few more options
    trackme_root_splitby = []
    for field in breakby_field_list:
        if field in ("index", "sourcetype"):
            trackme_root_splitby.append(field)
        elif field == "splunk_server":
            if dsm_tstats_root_breakby_include_splunk_server:
                trackme_root_splitby.append(field)
        elif field == "host":
            if dsm_tstats_root_breakby_include_host:
                trackme_root_splitby.append(field)
        else:
            trackme_root_splitby.append(field)

    # return as csv list
    trackme_root_splitby = ",".join(trackme_root_splitby)

    # aggreg split by (required for tstats searches)
    trackme_aggreg_splitby_list = ["index", "sourcetype"]
    if breakby_field and breakby_field != "merged":
        custom_breakby_field_list = breakby_field.split(",")
        for field in custom_breakby_field_list:
            if not field in trackme_aggreg_splitby_list:
                trackme_aggreg_splitby_list.append(field)

    # if entity_info["sourcetype"] is set to *, then remove sourcetype from trackme_aggreg_splitby_list
    if entity_info["sourcetype"] == "*":
        trackme_aggreg_splitby_list.remove("sourcetype")

    # translates into a csv list
    trackme_aggreg_splitby = ",".join(trackme_aggreg_splitby_list)

    # set tracker_type
    if account == "local":
        tracker_type = "local"
    else:
        tracker_type = "remote"

    #
    # define search string aggreg
    #

    if tracker_type == "local":
        if breakby_field:
            if breakby_field == "merged":
                # remove sourcetype
                trackme_aggreg_splitby_list = []
                trackme_aggreg_splitby_list = trackme_aggreg_splitby.split(",")
                if "sourcetype" in trackme_aggreg_splitby_list:
                    trackme_aggreg_splitby_list.remove("sourcetype")
                trackme_aggreg_splitby = ",".join(trackme_aggreg_splitby_list)

                # set object definition
                object_definition = ' | eval object=data_index . ":@all"'

                if search_mode in "tstats":
                    search_string_aggreg = (
                        "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                        + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                        + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                        + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                        + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                        + "sum(data_eventcount) as data_eventcount by "
                        + str(trackme_aggreg_splitby)
                        + "\n| eval dcount_host=round(latest_dcount_host_5m, 2)"
                        + "\n| eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                        + '\n| rename index as data_index | eval data_sourcetype="all"'
                        + object_definition
                    )

                elif search_mode in "raw":
                    search_string_aggreg = (
                        "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                        + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                        + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                        + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                        + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                        + "sum(data_eventcount) as data_eventcount by "
                        + str(trackme_aggreg_splitby)
                        + "\n"
                        + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                        + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)\n"
                        + " | rename index as data_index\n"
                        + object_definition
                    )

            else:
                if search_mode in "tstats":
                    # support multiple fields
                    break_by_field = breakby_field.split(",")

                    if len(break_by_field) == 1:

                        # sourcetype to any with a custom breakby
                        if not breakby_field_include_sourcetype:
                            object_definition = (
                                ' | eval object=data_index . ":" . "any" . "|key:" . "'
                                + str(breakby_field)
                                + '" . "|" . '
                                + str(breakby_field)
                            )

                        # otherwise
                        else:
                            object_definition = (
                                ' | eval object=data_index . ":" . data_sourcetype . "|key:" . "'
                                + str(breakby_field)
                                + '" . "|" . '
                                + str(breakby_field)
                            )

                    else:

                        # sourcetype to any with a custom breakby
                        if not breakby_field_include_sourcetype:
                            object_definition = (
                                ' | eval object=data_index . ":" . "any" . "|key:" . "'
                                + str(breakby_field).replace(",", ";")
                                + '" . "|"'
                            )

                        # otherwise
                        else:
                            object_definition = (
                                ' | eval object=data_index . ":" . data_sourcetype . "|key:" . "'
                                + str(breakby_field).replace(",", ";")
                                + '" . "|"'
                            )

                        append_count = 0
                        for subbreak_by_field in break_by_field:
                            if append_count == 0:
                                object_definition = (
                                    object_definition + " . " + subbreak_by_field
                                )
                            else:
                                object_definition = (
                                    object_definition
                                    + " . "
                                    + '";"'
                                    + " . "
                                    + subbreak_by_field
                                )
                            append_count += 1

                    # search string aggreg
                    if not breakby_field_include_sourcetype:
                        search_string_aggreg = (
                            "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                            + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                            + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                            + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                            + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                            + "sum(data_eventcount) as data_eventcount by "
                            + str(trackme_aggreg_splitby)
                            + "\n| eval dcount_host=round(latest_dcount_host_5m, 2)"
                            + "\n| eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                            + '\n| rename index as data_index | eval data_sourcetype="any"'
                            + object_definition
                        )

                    else:
                        search_string_aggreg = (
                            "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                            + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                            + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                            + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                            + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                            + "sum(data_eventcount) as data_eventcount by "
                            + str(trackme_aggreg_splitby)
                            + "\n| eval dcount_host=round(latest_dcount_host_5m, 2)"
                            + "\n| eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                            + "\n| rename index as data_index, sourcetype as data_sourcetype"
                            + object_definition
                        )

                elif search_mode in "raw":
                    # support multiple fields
                    break_by_field = breakby_field.split(",")

                    if len(break_by_field) == 1:

                        # sourcetype to any with a custom breakby
                        if not breakby_field_include_sourcetype:
                            object_definition = (
                                ' | eval object=data_index . ":" . "any" . "|rawkey:" . "'
                                + str(breakby_field)
                                + '" . "|" . '
                                + str(breakby_field)
                            )

                        # otherwise
                        else:
                            object_definition = (
                                ' | eval object=data_index . ":" . data_sourcetype . "|rawkey:" . "'
                                + str(breakby_field)
                                + '" . "|" . '
                                + str(breakby_field)
                            )

                    else:

                        # sourcetype to any with a custom breakby
                        if not breakby_field_include_sourcetype:
                            object_definition = (
                                ' | eval object=data_index . ":" . "any" . "|rawkey:" . "'
                                + str(breakby_field).replace(",", ";")
                                + '" . "|"'
                            )

                        # otherwise
                        else:
                            object_definition = (
                                ' | eval object=data_index . ":" . data_sourcetype . "|rawkey:" . "'
                                + str(breakby_field).replace(",", ";")
                                + '" . "|"'
                            )

                        append_count = 0
                        for subbreak_by_field in break_by_field:
                            if append_count == 0:
                                object_definition = (
                                    object_definition + " . " + subbreak_by_field
                                )
                            else:
                                object_definition = (
                                    object_definition
                                    + " . "
                                    + '";"'
                                    + " . "
                                    + subbreak_by_field
                                )
                            append_count += 1

                    # search string aggreg
                    if not breakby_field_include_sourcetype:
                        search_string_aggreg = (
                            "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                            + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                            + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                            + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                            + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                            + "sum(data_eventcount) as data_eventcount by "
                            + str(trackme_aggreg_splitby)
                            + "\n"
                            + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                            + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)\n"
                            + ' | rename index as data_index | eval data_sourcetype="any"\n'
                            + object_definition
                        )

                    else:
                        search_string_aggreg = (
                            "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                            + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                            + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                            + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                            + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                            + "sum(data_eventcount) as data_eventcount by "
                            + str(trackme_aggreg_splitby)
                            + "\n"
                            + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                            + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)\n"
                            + " | rename index as data_index, sourcetype as data_sourcetype\n"
                            + object_definition
                        )

        else:
            if search_mode in "tstats":
                search_string_aggreg = (
                    "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                    + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                    + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                    + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                    + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                    + "sum(data_eventcount) as data_eventcount by "
                    + str(trackme_aggreg_splitby)
                    + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                    + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                    + " | rename index as data_index, sourcetype as data_sourcetype"
                    + ' | eval object=data_index . ":" . data_sourcetype'
                )

            elif search_mode in "raw":
                search_string_aggreg = (
                    "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                    + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                    + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                    + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                    + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                    + "sum(data_eventcount) as data_eventcount by "
                    + str(trackme_aggreg_splitby)
                    + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                    + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                    + " | rename index as data_index, sourcetype as data_sourcetype"
                    + ' | eval object=data_index . ":" . data_sourcetype'
                )

    elif tracker_type == "remote":
        if breakby_field:
            if breakby_field == "merged":
                # remove sourcetype
                trackme_aggreg_splitby_list = []
                trackme_aggreg_splitby_list = trackme_aggreg_splitby.split(",")
                if "sourcetype" in trackme_aggreg_splitby_list:
                    trackme_aggreg_splitby_list.remove("sourcetype")
                trackme_aggreg_splitby = ",".join(trackme_aggreg_splitby_list)

                object_definition = (
                    ' | eval object=\\"remote|account:'
                    + str(account.replace('"', ""))
                    + '|\\" . data_index . \\":@all\\"'
                )

                if search_mode in "tstats":
                    search_string_aggreg = (
                        "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                        + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                        + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                        + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                        + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                        + "sum(data_eventcount) as data_eventcount by "
                        + str(trackme_aggreg_splitby)
                        + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                        + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                        + ' | rename index as data_index | eval data_sourcetype=\\"all\\"'
                        + object_definition
                    )

                elif search_mode in "raw":
                    search_string_aggreg = (
                        "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                        + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                        + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                        + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                        + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                        + "sum(data_eventcount) as data_eventcount by "
                        + str(trackme_aggreg_splitby)
                        + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                        + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                        + ' | rename index as data_index | eval data_sourcetype=\\"all\\"'
                        + object_definition
                    )

            else:
                if search_mode in "tstats":
                    # support multiple fields
                    break_by_field = breakby_field.split(",")

                    if len(break_by_field) == 1:

                        # sourcetype to any with a custom breakby
                        if not breakby_field_include_sourcetype:
                            object_definition = (
                                ' | eval object=\\"remote|account:'
                                + str(account.replace('"', ""))
                                + '|\\" . data_index . \\":\\" . \\"any\\" . \\"|key:\\" . \\"'
                                + str(breakby_field)
                                + '\\" . \\"|\\" . '
                                + str(breakby_field)
                            )

                        # otherwise
                        else:
                            object_definition = (
                                ' | eval object=\\"remote|account:'
                                + str(account.replace('"', ""))
                                + '|\\" . data_index . \\":\\" . data_sourcetype . \\"|key:\\" . \\"'
                                + str(breakby_field)
                                + '\\" . \\"|\\" . '
                                + str(breakby_field)
                            )

                    else:

                        # sourcetype to any with a custom breakby
                        if not breakby_field_include_sourcetype:
                            object_definition = (
                                ' | eval object=\\"remote|account:'
                                + str(account.replace('"', ""))
                                + '|\\" . data_index . \\":\\" . \\"any\\" . \\"|key:\\" . \\"'
                                + str(breakby_field).replace(",", ";")
                                + '\\" . \\"|\\" . '
                            )

                        # otherwise
                        else:
                            object_definition = (
                                ' | eval object=\\"remote|account:'
                                + str(account.replace('"', ""))
                                + '|\\" . data_index . \\":\\" . data_sourcetype . \\"|key:\\" . \\"'
                                + str(breakby_field).replace(",", ";")
                                + '\\" . \\"|\\" . '
                            )

                        append_count = 0
                        for subbreak_by_field in break_by_field:
                            if append_count == 0:
                                object_definition = (
                                    object_definition + " . " + subbreak_by_field
                                )
                            else:
                                object_definition = (
                                    object_definition
                                    + " . "
                                    + '\\";\\"'
                                    + " . "
                                    + subbreak_by_field
                                )
                            append_count += 1

                    # search string aggreg
                    if not breakby_field_include_sourcetype:
                        search_string_aggreg = (
                            "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                            + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                            + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                            + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                            + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                            + "sum(data_eventcount) as data_eventcount by "
                            + str(trackme_aggreg_splitby)
                            + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                            + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                            + ' | rename index as data_index | eval data_sourcetype=\\"any\\"'
                            + object_definition
                        )

                    else:
                        search_string_aggreg = (
                            "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                            + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                            + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                            + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                            + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                            + "sum(data_eventcount) as data_eventcount by "
                            + str(trackme_aggreg_splitby)
                            + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                            + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                            + " | rename index as data_index, sourcetype as data_sourcetype"
                            + object_definition
                        )

                elif search_mode in "raw":
                    # support multiple fields
                    break_by_field = breakby_field.split(",")

                    if len(break_by_field) == 1:

                        # sourcetype to any with a custom breakby
                        if not breakby_field_include_sourcetype:
                            object_definition = (
                                ' | eval object=\\"remote|account:'
                                + str(account.replace('"', ""))
                                + '|\\" . data_index . \\":\\" . \\"any\\" . \\"|rawkey:\\" . \\"'
                                + str(breakby_field)
                                + '\\" . \\"|\\" . '
                                + str(breakby_field)
                            )

                        # otherwise
                        else:
                            object_definition = (
                                ' | eval object=\\"remote|account:'
                                + str(account.replace('"', ""))
                                + '|\\" . data_index . \\":\\" . data_sourcetype . \\"|rawkey:\\" . \\"'
                                + str(breakby_field)
                                + '\\" . \\"|\\" . '
                                + str(breakby_field)
                            )

                    else:

                        # sourcetype to any with a custom breakby
                        if not breakby_field_include_sourcetype:
                            object_definition = (
                                ' | eval object=\\"remote|account:'
                                + str(account.replace('"', ""))
                                + '|\\" . data_index . \\":\\" . \\"any\\" . \\"|rawkey:\\" . \\"'
                                + str(breakby_field).replace(",", ";")
                                + '\\" . \\"|\\" . '
                            )

                        # otherwise
                        else:
                            object_definition = (
                                ' | eval object=\\"remote|account:'
                                + str(account.replace('"', ""))
                                + '|\\" . data_index . \\":\\" . data_sourcetype . \\"|rawkey:\\" . \\"'
                                + str(breakby_field).replace(",", ";")
                                + '\\" . \\"|\\" . '
                            )

                        append_count = 0
                        for subbreak_by_field in break_by_field:
                            if append_count == 0:
                                object_definition = (
                                    object_definition + " . " + subbreak_by_field
                                )
                            else:
                                object_definition = (
                                    object_definition
                                    + " . "
                                    + '\\";\\"'
                                    + " . "
                                    + subbreak_by_field
                                )
                            append_count += 1

                    # search string aggreg
                    if not breakby_field_include_sourcetype:
                        search_string_aggreg = (
                            "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                            + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                            + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                            + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                            + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                            + "sum(data_eventcount) as data_eventcount by "
                            + str(trackme_aggreg_splitby)
                            + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                            + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                            + ' | rename index as data_index | eval data_sourcetype=\\"any\\"'
                            + object_definition
                        )

                    else:
                        search_string_aggreg = (
                            "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                            + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                            + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                            + "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                            + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                            + "sum(data_eventcount) as data_eventcount by "
                            + str(trackme_aggreg_splitby)
                            + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                            + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                            + " | rename index as data_index, sourcetype as data_sourcetype"
                            + object_definition
                        )

        else:
            if search_mode in "tstats":
                search_string_aggreg = (
                    "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                    + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                    + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                    + " max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                    + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                    + "sum(data_eventcount) as data_eventcount by "
                    + str(trackme_aggreg_splitby)
                    + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                    + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                    + " | rename index as data_index, sourcetype as data_sourcetype"
                    + ' | eval object=\\"remote|account:'
                    + str(account.replace('"', ""))
                    + '|\\" . data_index . \\":\\" . data_sourcetype'
                )

            elif search_mode in "raw":
                search_string_aggreg = (
                    "stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
                    + "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
                    + "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
                    + " max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
                    + "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
                    + "sum(data_eventcount) as data_eventcount by "
                    + str(trackme_aggreg_splitby)
                    + " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
                    + " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
                    + " | rename index as data_index, sourcetype as data_sourcetype"
                    + ' | eval object=\\"remoteraw|account:'
                    + str(account.replace('"', ""))
                    + '|\\" . data_index . \\":\\" . data_sourcetype'
                )

    # report search
    if tracker_type == "local":
        if search_mode in "tstats":
            if dsm_tstats_root_breakby_include_host:
                report_search = (
                    "| tstats max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
                    + "count as data_eventcount where "
                    + str(root_constraint)
                    + ' _index_earliest="'
                    + index_earliest_time
                    + '" _index_latest="'
                    + index_latest_time
                    + '"'
                    + " by _time,"
                    + str(trackme_root_splitby)
                    + " span="
                    + str(dsm_tstats_root_time_span)
                    + "\n| eval data_last_ingestion_lag_seen=data_last_ingest-data_last_time_seen"
                    + "\n``` intermediate calculation ```"
                    + "\n| bucket _time span=1m"
                    + "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, max(data_last_time_seen) as data_last_time_seen, sum(data_eventcount) as data_eventcount, dc(host) as dcount_host by _time,"
                    + str(trackme_aggreg_splitby)
                    + "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
                    + str(trackme_aggreg_splitby)
                    + " | eval spantime=if(spantime>=(now()-300), spantime, null())"
                    + "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
                    + str(trackme_aggreg_splitby)
                    + "\n| "
                    + str(search_string_aggreg)
                    + "\n``` tenant_id ```"
                    + '\n| eval tenant_id="'
                    + str(tenant_id)
                    + '"'
                    + "\n``` call the abstract macro ```"
                    + "\n`trackme_dsm_tracker_abstract("
                    + str(tenant_id)
                    + ", tstats)`"
                )

            else:
                report_search = (
                    "| tstats max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
                    + "count as data_eventcount, dc(host) as dcount_host where "
                    + str(root_constraint)
                    + ' _index_earliest="'
                    + index_earliest_time
                    + '" _index_latest="'
                    + index_latest_time
                    + '"'
                    + " by _time,"
                    + str(trackme_root_splitby)
                    + " span="
                    + str(dsm_tstats_root_time_span)
                    + "\n| eval data_last_ingestion_lag_seen=data_last_ingest-data_last_time_seen"
                    + "\n``` intermediate calculation ```"
                    + "\n| bucket _time span=1m"
                    + "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, max(data_last_time_seen) as data_last_time_seen, sum(data_eventcount) as data_eventcount, max(dcount_host) as dcount_host by _time,"
                    + str(trackme_aggreg_splitby)
                    + "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
                    + str(trackme_aggreg_splitby)
                    + " | eval spantime=if(spantime>=(now()-300), spantime, null())"
                    + "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
                    + str(trackme_aggreg_splitby)
                    + "\n| "
                    + str(search_string_aggreg)
                    + "\n``` tenant_id ```"
                    + '\n| eval tenant_id="'
                    + str(tenant_id)
                    + '"'
                    + "\n``` call the abstract macro ```"
                    + "\n`trackme_dsm_tracker_abstract("
                    + str(tenant_id)
                    + ", tstats)`"
                )

        elif search_mode in "raw":
            report_search = (
                str(root_constraint)
                + ' _index_earliest="'
                + index_earliest_time
                + '" _index_latest="'
                + index_latest_time
                + '"'
                + "\n| eval data_last_ingestion_lag_seen=(_indextime-_time)"
                + "\n``` intermediate calculation ```"
                + "\n| bucket _time span=1m"
                + "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
                + "count as data_eventcount, dc(host) as dcount_host by _time,"
                + str(trackme_aggreg_splitby)
                + "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
                + str(trackme_aggreg_splitby)
                + " | eval spantime=if(spantime>=(now()-300), spantime, null())"
                + "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
                + str(trackme_aggreg_splitby)
                + "\n| "
                + str(search_string_aggreg)
                + "\n``` tenant_id ```"
                + '\n| eval tenant_id="'
                + str(tenant_id)
                + '"'
                + "\n``` call the abstract macro ```"
                + "\n`trackme_dsm_tracker_abstract("
                + str(tenant_id)
                + ", raw)`"
            )

    elif tracker_type == "remote":
        if search_mode in "tstats":
            if dsm_tstats_root_breakby_include_host:
                report_search = (
                    '| splunkremotesearch account="'
                    + str(account)
                    + '"'
                    + ' search="'
                    + "| tstats max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
                    + 'count as data_eventcount where (host=* host!=\\"\\") '
                    + str(root_constraint.replace('"', '\\"'))
                    + ' _index_earliest="'
                    + index_earliest_time
                    + '" _index_latest="'
                    + index_latest_time
                    + '"'
                    + " by _time,"
                    + str(trackme_root_splitby)
                    + " span="
                    + str(dsm_tstats_root_time_span)
                    + "\n| eval data_last_ingestion_lag_seen=data_last_ingest-data_last_time_seen"
                    + "\n``` intermediate calculation ```"
                    + "\n| bucket _time span=1m"
                    + "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, max(data_last_time_seen) as data_last_time_seen, sum(data_eventcount) as data_eventcount, dc(host) as dcount_host by _time,"
                    + str(trackme_aggreg_splitby)
                    + "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
                    + str(trackme_aggreg_splitby)
                    + " | eval spantime=if(spantime>=(now()-300), spantime, null())"
                    + "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
                    + str(trackme_aggreg_splitby)
                    + "\n| "
                    + str(search_string_aggreg)
                    + '" earliest="'
                    + str(earliest_time)
                    + '" '
                    + 'latest="'
                    + str(latest_time)
                    + '" tenant_id="'
                    + str(tenant_id)
                    + '" component="splk-dsm"'
                    + "\n``` set tenant_id ```\n"
                    + '\n| eval tenant_id="'
                    + str(tenant_id)
                    + '"'
                    + "\n``` call the abstract macro ```"
                    + "\n`trackme_dsm_tracker_abstract("
                    + str(tenant_id)
                    + ", tstats)`"
                )

            else:
                report_search = (
                    '| splunkremotesearch account="'
                    + str(account)
                    + '"'
                    + ' search="'
                    + "| tstats max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
                    + "count as data_eventcount, dc(host) as dcount_host where "
                    + str(root_constraint.replace('"', '\\"'))
                    + ' _index_earliest="'
                    + index_earliest_time
                    + '" _index_latest="'
                    + index_latest_time
                    + '"'
                    + " by _time,"
                    + str(trackme_root_splitby)
                    + " span="
                    + str(dsm_tstats_root_time_span)
                    + "\n| eval data_last_ingestion_lag_seen=data_last_ingest-data_last_time_seen"
                    + "\n``` intermediate calculation ```"
                    + "\n| bucket _time span=1m"
                    + "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, max(data_last_time_seen) as data_last_time_seen, sum(data_eventcount) as data_eventcount, max(dcount_host) as dcount_host by _time,"
                    + str(trackme_aggreg_splitby)
                    + "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
                    + str(trackme_aggreg_splitby)
                    + " | eval spantime=if(spantime>=(now()-300), spantime, null())"
                    + "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
                    + str(trackme_aggreg_splitby)
                    + "\n| "
                    + str(search_string_aggreg)
                    + '" earliest="'
                    + str(earliest_time)
                    + '" '
                    + 'latest="'
                    + str(latest_time)
                    + '" tenant_id="'
                    + str(tenant_id)
                    + '" component="splk-dsm"'
                    + "\n``` set tenant_id ```\n"
                    + '\n| eval tenant_id="'
                    + str(tenant_id)
                    + '"'
                    + "\n``` call the abstract macro ```"
                    + "\n`trackme_dsm_tracker_abstract("
                    + str(tenant_id)
                    + ", tstats)`"
                )

        elif search_mode in "raw":
            report_search = (
                '| splunkremotesearch account="'
                + str(account)
                + '"'
                + ' search="'
                + "search "
                + str(root_constraint.replace('"', '\\"'))
                + ' _index_earliest="'
                + index_earliest_time
                + '" _index_latest="'
                + index_latest_time
                + '"'
                + "\n| eval data_last_ingestion_lag_seen=(_indextime-_time)"
                + "\n``` intermediate calculation ```"
                + "\n| bucket _time span=1m"
                + "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
                + "count as data_eventcount, dc(host) as dcount_host by _time,"
                + str(trackme_aggreg_splitby)
                + "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
                + str(trackme_aggreg_splitby)
                + " | eval spantime=if(spantime>=(now()-300), spantime, null())"
                + "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
                + str(trackme_aggreg_splitby)
                + "\n| "
                + str(search_string_aggreg)
                + '" earliest="'
                + str(earliest_time)
                + '" '
                + 'latest="'
                + str(latest_time)
                + '" tenant_id="'
                + str(tenant_id)
                + '" component="splk-dsm"'
                + "\n``` tenant_id ```"
                + '\n| eval tenant_id="'
                + str(tenant_id)
                + '"'
                + "\n``` call the abstract macro ```"
                + "\n`trackme_dsm_tracker_abstract("
                + str(tenant_id)
                + ", raw)`"
            )

    #
    # finalize the search
    #

    report_search = remove_leading_spaces(
        f"""\
        {report_search}
        ``` collects latest collection state into the summary index ```
        | `trackme_collect_state("current_state_tracking:splk-dsm:{tenant_id}", "object", "{tenant_id}")`

        ``` output flipping change status if changes ```
        | trackmesplkgetflipping tenant_id="{tenant_id}" object_category="splk-dsm"
        | `trackme_outputlookup(trackme_dsm_tenant_{tenant_id}, key)`
        | `trackme_mcollect(object, splk-dsm, "metric_name:trackme.splk.feeds.avg_eventcount_5m=avg_eventcount_5m, metric_name:trackme.splk.feeds.latest_eventcount_5m=latest_eventcount_5m, metric_name:trackme.splk.feeds.perc95_eventcount_5m=perc95_eventcount_5m, metric_name:trackme.splk.feeds.stdev_eventcount_5m=stdev_eventcount_5m, metric_name:trackme.splk.feeds.avg_latency_5m=avg_latency_5m, metric_name:trackme.splk.feeds.latest_latency_5m=latest_latency_5m, metric_name:trackme.splk.feeds.perc95_latency_5m=perc95_latency_5m, metric_name:trackme.splk.feeds.stdev_latency_5m=stdev_latency_5m, metric_name:trackme.splk.feeds.eventcount_4h=data_eventcount, metric_name:trackme.splk.feeds.hostcount_4h=dcount_host, metric_name:trackme.splk.feeds.lag_event_sec=data_last_lag_seen, metric_name:trackme.splk.feeds.lag_ingestion_sec=data_last_ingestion_lag_seen", "tenant_id, object_category, object", "{tenant_id}")`
        """
    )

    return report_search


# This function is used to generate metrics for splk-dsm and for the data sampling feature per model metrics
def trackme_splk_dsm_data_sampling_gen_metrics(
    tenant_id, metrics_idx, object_value, object_key, model_split_dict
):
    try:
        # Create a dedicated logger for DSM metrics
        dsm_logger = logging.getLogger("trackme.dsm.metrics")
        dsm_logger.setLevel(logging.INFO)

        # Only add the handler if it doesn't exist yet
        if not dsm_logger.handlers:
            # Set up the file handler
            filehandler = RotatingFileHandler(
                f"{splunkhome}/var/log/splunk/trackme_splk_dsm_metrics.log",
                mode="a",
                maxBytes=100000000,
                backupCount=1,
            )
            formatter = JSONFormatter()
            filehandler.setFormatter(formatter)
            dsm_logger.addHandler(filehandler)
            # Prevent propagation to root logger
            dsm_logger.propagate = False
        else:
            # Find the RotatingFileHandler among existing handlers
            filehandler = None
            for handler in dsm_logger.handlers:
                if isinstance(handler, RotatingFileHandler):
                    filehandler = handler
                    break
            
            # If no RotatingFileHandler found, create one
            if filehandler is None:
                filehandler = RotatingFileHandler(
                    f"{splunkhome}/var/log/splunk/trackme_splk_dsm_metrics.log",
                    mode="a",
                    maxBytes=100000000,
                    backupCount=1,
                )
                formatter = JSONFormatter()
                filehandler.setFormatter(formatter)
                dsm_logger.addHandler(filehandler)

        for key, record in model_split_dict.items():
            dsm_logger.info(
                "Metrics - group=feeds_metrics",
                extra={
                    "target_index": metrics_idx,
                    "tenant_id": tenant_id,
                    "object": decode_unicode(object_value),
                    "object_id": object_key,
                    "object_category": "splk-dsm",
                    "model_id": key,
                    "model_name": record.get("model_name"),
                    "model_type": record.get("model_type"),
                    "model_is_major": record.get("model_is_major"),
                    "metrics_event": json.dumps(
                        {
                            "sampling.model_pct_match": float(
                                record.get("model_pct_match")
                            ),
                            "sampling.model_count_matched": int(
                                record.get("model_count_matched")
                            ),
                            "sampling.model_count_parsed": int(
                                record.get("model_count_parsed")
                            ),
                        }
                    ),
                },
            )

        return True

    except Exception as e:
        raise Exception(str(e))


# This function is used to generate metrics for splk-dsm and for the data sampling feature and the total run_time/event_count metrics
def trackme_splk_dsm_data_sampling_total_run_time_gen_metrics(
    tenant_id, metrics_idx, object_value, object_key, run_time, events_count
):
    try:
        # Create a dedicated logger for DSM metrics
        dsm_logger = logging.getLogger("trackme.dsm.metrics")
        dsm_logger.setLevel(logging.INFO)

        # Only add the handler if it doesn't exist yet
        if not dsm_logger.handlers:
            # Set up the file handler
            filehandler = RotatingFileHandler(
                f"{splunkhome}/var/log/splunk/trackme_splk_dsm_metrics.log",
                mode="a",
                maxBytes=100000000,
                backupCount=1,
            )
            formatter = JSONFormatter()
            filehandler.setFormatter(formatter)
            dsm_logger.addHandler(filehandler)
            # Prevent propagation to root logger
            dsm_logger.propagate = False
        else:
            # Find the RotatingFileHandler among existing handlers
            filehandler = None
            for handler in dsm_logger.handlers:
                if isinstance(handler, RotatingFileHandler):
                    filehandler = handler
                    break
            
            # If no RotatingFileHandler found, create one
            if filehandler is None:
                filehandler = RotatingFileHandler(
                    f"{splunkhome}/var/log/splunk/trackme_splk_dsm_metrics.log",
                    mode="a",
                    maxBytes=100000000,
                    backupCount=1,
                )
                formatter = JSONFormatter()
                filehandler.setFormatter(formatter)
                dsm_logger.addHandler(filehandler)

        dsm_logger.info(
            "Metrics - group=feeds_metrics",
            extra={
                "target_index": metrics_idx,
                "tenant_id": tenant_id,
                "object": decode_unicode(object_value),
                "object_id": object_key,
                "object_category": "splk-dsm",
                "metrics_event": json.dumps(
                    {
                        "sampling.run_time": round(run_time, 3),
                        "sampling.events_count": int(events_count),
                    }
                ),
            },
        )

        return True

    except Exception as e:
        raise Exception(str(e))


# This function is used to generate metrics for splk-dhm
def trackme_splk_dhm_gen_metrics(tenant_id, metrics_idx, records):
    try:
        # Create a dedicated logger for DHM metrics
        dhm_logger = logging.getLogger("trackme.dhm.metrics")
        dhm_logger.setLevel(logging.INFO)

        # Only add the handler if it doesn't exist yet
        if not dhm_logger.handlers:
            # Set up the file handler
            filehandler = RotatingFileHandler(
                f"{splunkhome}/var/log/splunk/trackme_splk_dhm_metrics.log",
                mode="a",
                maxBytes=100000000,
                backupCount=1,
            )
            formatter = JSONFormatter()
            filehandler.setFormatter(formatter)
            dhm_logger.addHandler(filehandler)
            # Prevent propagation to root logger
            dhm_logger.propagate = False
        else:
            # Find the RotatingFileHandler among existing handlers
            filehandler = None
            for handler in dhm_logger.handlers:
                if isinstance(handler, RotatingFileHandler):
                    filehandler = handler
                    break
            
            # If no RotatingFileHandler found, create one
            if filehandler is None:
                filehandler = RotatingFileHandler(
                    f"{splunkhome}/var/log/splunk/trackme_splk_dhm_metrics.log",
                    mode="a",
                    maxBytes=100000000,
                    backupCount=1,
                )
                formatter = JSONFormatter()
                filehandler.setFormatter(formatter)
                dhm_logger.addHandler(filehandler)

        for record in records:
            metrics_dict = record.get("metrics_dict", None)

            if metrics_dict:
                for metric_entity, metrics_event in metrics_dict.items():
                    dhm_logger.info(
                        "Metrics - group=feeds_metrics",
                        extra={
                            "target_index": metrics_idx,
                            "tenant_id": tenant_id,
                            "object": decode_unicode(record.get("object")),
                            "object_id": record.get("object_id"),
                            "alias": record.get("alias"),
                            "object_category": record.get("object_category"),
                            "idx": metrics_event.get("idx"),
                            "st": metrics_event.get("st"),
                            "metrics_event": json.dumps(
                                {
                                    "last_eventcount": float(
                                        metrics_event.get("last_eventcount")
                                    ),
                                    "last_ingest_lag": float(
                                        metrics_event.get("last_ingest_lag")
                                    ),
                                    "last_event_lag": float(
                                        metrics_event.get("last_event_lag")
                                    ),
                                }
                            ),
                        },
                    )

        return True

    except Exception as e:
        raise Exception(str(e))


# This function is used to generate metrics for splk-mhm
def trackme_splk_mhm_gen_metrics(tenant_id, metrics_idx, records):
    try:
        # Create a dedicated logger for MHM metrics
        mhm_logger = logging.getLogger("trackme.mhm.metrics")
        mhm_logger.setLevel(logging.INFO)

        # Only add the handler if it doesn't exist yet
        if not mhm_logger.handlers:
            # Set up the file handler
            filehandler = RotatingFileHandler(
                f"{splunkhome}/var/log/splunk/trackme_splk_mhm_metrics.log",
                mode="a",
                maxBytes=100000000,
                backupCount=1,
            )
            formatter = JSONFormatter()
            filehandler.setFormatter(formatter)
            mhm_logger.addHandler(filehandler)
            # Prevent propagation to root logger
            mhm_logger.propagate = False
        else:
            # Find the RotatingFileHandler among existing handlers
            filehandler = None
            for handler in mhm_logger.handlers:
                if isinstance(handler, RotatingFileHandler):
                    filehandler = handler
                    break
            
            # If no RotatingFileHandler found, create one
            if filehandler is None:
                filehandler = RotatingFileHandler(
                    f"{splunkhome}/var/log/splunk/trackme_splk_mhm_metrics.log",
                    mode="a",
                    maxBytes=100000000,
                    backupCount=1,
                )
                formatter = JSONFormatter()
                filehandler.setFormatter(formatter)
                mhm_logger.addHandler(filehandler)

        for record in records:
            metrics_dict = record.get("metrics_dict", None)

            if metrics_dict:
                for metric_entity, metrics_event in metrics_dict.items():
                    mhm_logger.info(
                        "Metrics - group=feeds_metrics",
                        extra={
                            "target_index": metrics_idx,
                            "tenant_id": tenant_id,
                            "object": decode_unicode(record.get("object")),
                            "object_id": record.get("object_id"),
                            "alias": record.get("alias"),
                            "object_category": record.get("object_category"),
                            "metric_category": metrics_event.get("metric_category"),
                            "metrics_event": json.dumps(
                                {
                                    "last_metric_lag": float(
                                        metrics_event.get("last_metric_lag")
                                    ),
                                }
                            ),
                        },
                    )

        return True

    except Exception as e:
        raise Exception(str(e))
