"""##################################### TERMS OF USE ###########################################
# The following code is provided for demonstration purpose only, and should not              #
# be used without independent verification. Recorded Future makes no representations         #
# or warranties, express, implied, statutory, or otherwise, regarding any aspect of          #
# this code or of the information it may retrieve, and provides it both strictly “as-is”     #
# and without assuming responsibility for any information it may retrieve. Recorded Future   #
# shall not be liable for, and you assume all risk of using, the foregoing. By using this    #
# code, Customer represents that it is solely responsible for having all necessary licenses, #
# permissions, rights, and/or consents to connect to third party APIs, and that it is solely #
# responsible for having all necessary licenses, permissions, rights, and/or consents to     #
# any data accessed from any third party API.                                                #
##############################################################################################
"""

import json
import logging
import os
from datetime import datetime, timedelta
from json.decoder import JSONDecodeError

from elasticsearch import Elasticsearch
from elasticsearch.exceptions import (
    AuthenticationException,
    AuthorizationException,
    ConnectionError,
    NotFoundError,
    SSLError,
)
from psengine.collective_insights import RFInsight
from psengine.config import Config, ConfigError

from .constants import (
    ALERTS_INDEX,
    FIELD_MAP,
    HOSTNAME,
    LOOKBACK,
    MAX_RESULTS,
    TIMEOUT,
    TIMESTAMP,
    TIMESTAMP_FORMAT,
)
from .errors import CategoryParseError, ElasticClientError, RFScriptError, WriteFileError
from .utils import get_category

TIMESTAMP_FILE = os.path.join('config', 'latest_timestamp.txt')


class ESCI:
    """Elasticsearch for Collective Insights class"""

    def __init__(
        self,
        config: Config,
        elastic_user: str,
        elastic_pass: str,
        lookback: int = LOOKBACK,
        timestamp_field: str = TIMESTAMP,
        hostname: str = HOSTNAME,
    ):
        self.log = logging.getLogger('psengine')
        self.settings = config.settings
        self.elastic_user = elastic_user
        self.elastic_password = elastic_pass
        self.lookback = self._set_lookback(lookback)
        self.timestamp_field = timestamp_field
        self.hostname = hostname
        self.latest_timestamp = datetime.now().strftime(TIMESTAMP_FORMAT)
        self._issue_auth_warnings()
        self._validate_elasticsearch_settings()
        self.elastic_auth = self._validate_and_set_auth()
        self.es = self._create_es_client()
        self.es_info = self._fetch_server_info()
        self.es_index = self._set_elastic_index()

    def _set_lookback(self, lookback: int, timestamp_file: str = TIMESTAMP_FILE) -> str:
        """Reads most recent timestamp from file if it exists
        Otherwise sets lookback from script argument

        Args:
            lookback (int): Lookback hours provided from script input

        Returns:
            lookback_str (str): time period used to query Elastic alerts
        """
        if os.path.exists(timestamp_file):
            self.log.info(f'Reading latest alert timestamp from {timestamp_file}')
            try:
                with open(timestamp_file, 'r') as f:
                    latest_timestamp = f.read()
                timestamp = datetime.strptime(latest_timestamp, TIMESTAMP_FORMAT)
                updated_timestamp = timestamp + timedelta(milliseconds=1)
                lookback_str = updated_timestamp.strftime(TIMESTAMP_FORMAT)
                self.log.info(f'Using latest alert timestamp from file: {lookback_str}')
            except OSError as err:
                self.log.error(f'Error reading latest timestamp from file: {err}')
                self.log.info(f"Using '{lookback}' hour(s) instead")
                lookback_str = f'now-{lookback}h'
        else:
            self.log.info(
                f"File '{timestamp_file}' does not exist. Using '{lookback}' hour(s) instead"
            )
            lookback_str = f'now-{lookback}h'
        return lookback_str

    def write_latest_timestamp(self, timestamp_file: str = TIMESTAMP_FILE):
        """Writes timestamp from the latest Elastic alert to file if exists,
        othewise uses the script execution time

        Args:
            latest_timestamp (str): Timestamp from latest alert or execution time if no alerts
        """
        self.log.info(f"Writing latest timestamp to '{timestamp_file}'")
        try:
            with open(timestamp_file, 'w') as f:
                f.write(self.latest_timestamp)
        except OSError as err:
            raise WriteFileError(str(err))

    def _issue_auth_warnings(self):
        """Parses script settings and validates Elasticsearch client parameters

        Args:
            settings (dict): Configuration from settings.ini
        """
        es_api_key_id = self.settings.get('elasticsearch', {}).get('es_api_key_id', None)
        es_api_key = self.settings.get('elasticsearch', {}).get('es_api_key', None)

        if es_api_key_id is None and es_api_key is not None:
            self.log.warning(
                'Elasticsearch API key set, but API key ID not set (check settings.ini)'
            )
        if es_api_key_id is not None and es_api_key is None:
            self.log.warning(
                'Elasticsearch API key ID set, but API key not set (check settings.ini)'
            )
        if self.elastic_user is None and self.elastic_password is not None:
            self.log.warning(
                'Elasticsearch user set, but password not set. Set RF_ELASTIC_USER env var'
            )
        if self.elastic_user is not None and self.elastic_password is None:
            self.log.warning(
                'Elasticsearch password set, but user not set. Set RF_ELASTIC_PASSWORD env var'
            )
        if (es_api_key_id is not None or es_api_key is not None) and (
            self.elastic_user is not None or self.elastic_password is not None
        ):
            self.log.warning(
                'Elasticsearch http_auth and api_key variables set. Use one type of authentication'
            )
        if (
            es_api_key_id is None
            and es_api_key is None
            and self.elastic_user is None
            and self.elastic_password is None
        ):
            self.log.warning('No Elasticsearch credentials supplied')

    def _validate_elasticsearch_settings(self):
        """Validates script settings

        Args:
            settings (dict): Configuration from settings.ini

        Raises:
            RFScriptError: When arguments are invalid or not found
        """
        if self.settings.get('elasticsearch') is None:
            raise RFScriptError('[elasticsearch] stanza must be set in settings.ini')

        if 'cloud_id' not in self.settings.get(
            'elasticsearch', {}
        ) and 'hosts' not in self.settings.get('elasticsearch', {}):
            raise RFScriptError('One of the following parameters must be used: hosts or cloud_id')

        if 'cloud_id' in self.settings.get('elasticsearch', {}) and 'hosts' in self.settings.get(
            'elasticsearch', {}
        ):
            raise RFScriptError(
                'Only one of the following parameters can be used at a time: hosts or cloud_id'
            )

        self.log.info('Elasticsearch settings:')
        if 'cloud_id' in self.settings.get('elasticsearch', {}):
            self.log.info(
                f'  Cloud instance: {self.settings.get("elasticsearch", {}).get("cloud_id")}'
            )
        elif 'hosts' not in self.settings.get('elasticsearch', {}):
            raise RFScriptError('Please set one of cloud_id or hosts in [elasticsearch] stanza')
        else:
            hosts = self.settings.get('elasticsearch', {}).get('hosts')
            self.log.info(f'  Hosts: {hosts}')
            if not isinstance(hosts, list) and hosts.startswith('[') and hosts.endswith(']'):
                try:
                    hosts = json.loads(hosts)
                except JSONDecodeError:
                    raise RFScriptError('Invalid Hosts input')

            if isinstance(hosts, list):
                for host in hosts:
                    self._validate_port(host_str=host)
            else:
                self._validate_port(host_str=hosts)

    def _validate_port(self, host_str: str):
        port = host_str.split(':')[-1]
        host = host_str.split(':')[0]
        try:
            port = int(port)
        except ValueError:
            raise ConfigError(f"Elasticsearch host '{host}' port value is not an integer: '{port}'")

        if not (port > 0 and port <= 65535):
            raise ConfigError(
                f"Elasticsearch host '{host}' port value is not '0 < port < 65535'. Vaue: '{port}'"
            )

    def _validate_and_set_auth(self) -> dict:
        """Parses script settings and sets Elasticsearch client parameters

        Args:
            settings (dict): Elasticsearch configuration from settings.ini
            elastic_user (str): Elastic username
            elastic_password (str): Elastic password

        Raises:
            RFScriptError: When Certificate Auth is enabled but files are not found

        Returns:
            elastic_config (dict): Elasticsearch client kwargs
        """
        elastic_config = {}
        if self.settings.get('elasticsearch', {}).get('cloud_id') is not None:
            elastic_config['cloud_id'] = self.settings.get('elasticsearch', {}).get('cloud_id')
        if self.settings.get('elasticsearch', {}).get('hosts') is not None:
            elastic_config['hosts'] = self.settings.get('elasticsearch', {}).get('hosts')

        if self.elastic_user is not None and self.elastic_password is not None:
            elastic_config['http_auth'] = (self.elastic_user, self.elastic_password)

        es_api_key_id = self.settings.get('elasticsearch', {}).get('es_api_key_id', None)
        es_api_key = self.settings.get('elasticsearch', {}).get('es_api_key', None)

        if es_api_key_id is not None and es_api_key is not None:
            elastic_config['api_key'] = (es_api_key_id, es_api_key)

        if 'ca_certs' in self.settings.get('elasticsearch', {}):
            ca_certs = self.settings.get('elasticsearch', {}).get('ca_certs')
            if not os.path.exists(ca_certs):
                raise RFScriptError(f"ca_certs file '{ca_certs}' not found")
            elastic_config['ca_certs'] = ca_certs

        if 'client_cert' in self.settings.get('elasticsearch', {}):
            client_cert = self.settings.get('elasticsearch', {}).get('client_cert')
            if not os.path.exists(client_cert):
                raise RFScriptError(f"client_cert file '{client_cert}' not found")
            elastic_config['client_cert'] = client_cert

        if 'client_key' in self.settings.get('elasticsearch', {}):
            client_key = self.settings.get('elasticsearch', {}).get('client_key')
            if not os.path.exists(client_key):
                raise RFScriptError(f"client_key file '{client_key}' not found")
            elastic_config['client_key'] = client_key

        if 'use_ssl' in self.settings.get('elasticsearch', {}):
            elastic_config['use_ssl'] = (
                self.settings.get('elasticsearch', {}).get('use_ssl').lower() == 'true'
            )
        if 'verify_certs' in self.settings.get('elasticsearch', {}):
            elastic_config['verify_certs'] = (
                self.settings.get('elasticsearch', {}).get('verify_certs').lower() == 'true'
            )

        return elastic_config

    def _create_es_client(self) -> Elasticsearch:
        """Creates Elasticsearch client

        Args:
            elastic_settings (dict): Elastic settings to initialize client

        Returns:
            elastic_client (Elasticsearch): Elasticsearch client object
        """
        self.elastic_auth['timeout'] = TIMEOUT
        self.log.info('Creating Elasticsearch object with settings:')
        for setting, value in self.elastic_auth.items():
            if setting == 'http_auth' or setting == 'api_key':
                self.log.info(f'  {setting}: ********')
                continue
            self.log.info(f'  {setting}: {value}')
        elastic_client = Elasticsearch(**self.elastic_auth)
        return elastic_client

    def _fetch_server_info(self) -> dict:
        """Fetch Elastic server information

        Args:
            client (Elasticsearch): Elasticsearch python client

        Raises:
            ElasticClientError: Error connecting to the Elastic instance

        Returns:
            server_info (dict): Server information
        """
        try:
            self.log.info('Fetching Elastic server information')
            server_info = self.es.info()
            self.log.debug(f'Server information: {server_info}')
        except (AuthenticationException, ConnectionError, SSLError) as err:
            raise ElasticClientError(f'Failed to connect to Elastic instance: {err}')
        return server_info

    def _set_elastic_index(self) -> str:
        if index := self.settings.get('elasticsearch', {}).get('es_alerts_index'):
            self.log.info(f'Using settings.ini supplied elastic index: {index}')
            return index
        if elastic_version := self.es_info.get('version', {}).get('number'):
            major_version = int(elastic_version[0])
        else:
            raise RFScriptError(
                f'Unable to parse Elastic major version from the system information: {self.es_info}'
            )
        index = ALERTS_INDEX.get(major_version)
        self.log.info(
            f'Elasticsearch major verison and corresponding index: [{major_version}:{index}]'
        )
        return index

    def fetch_elastic_alerts(
        self,
        size: int = MAX_RESULTS,
    ) -> dict:
        """Fetches Elastic alerts that have been generated since lookback time period

        Args:
            size (int): Size limit for query response (default | max = 10,000)

        Raises:
            ElasticClientError: When there is an issue running the alerts search

        Returns:
            response (json): json response from Elastic Search API
        """
        query = {
            'bool': {
                'must': [{'match': {'kibana.alert.rule.type': 'threat_match'}}],
                'filter': [{'range': {self.timestamp_field: {'gte': self.lookback}}}],
            }
        }
        fields = [
            'kibana.alert.rule.name',
            'threat.enrichments.indicator.type',
            'threat.enrichments.matched.atomic',
            'threat.tactic.id',
            {'field': self.hostname, 'include_unmapped': True},
            {'field': self.timestamp_field, 'include_unmapped': True},
        ]
        sort = [{self.timestamp_field: {'order': 'desc'}}]
        body = {'query': query, 'fields': fields, 'sort': sort, '_source': False}
        self.log.debug(f'Searching Elastic for alerts with following body: {body}')
        try:
            elastic_response = self.es.search(index=self.es_index, body=body, size=size)
        except (
            AuthenticationException,
            AuthorizationException,
            ConnectionError,
            NotFoundError,
        ) as err:
            raise ElasticClientError(err)
        return elastic_response

    def format_ci_payload(self, elastic_alerts: list) -> list:
        """Parses elastic alerts data into Recorded Future Collective Insights format
        Writes latest Elastic alert timestamp to file

        Args:
            elastic_alerts (list): Elastic documents

        Returns:
            collective_insights_data (list): list of RFInsight objects to send to
                Recorded Future Collective Insights
        """
        self.log.info('Preparing Elasticsearch alerts for Collective Insights submission')
        try:
            alert_timestamp = elastic_alerts[0]['fields'][self.timestamp_field][0]
            self.latest_timestamp = alert_timestamp
        except (KeyError, IndexError):
            self.log.warning(
                'Did not parse latest timestamp from document, defaulting to execution time'
            )

        collective_insights_data = []
        for alert in elastic_alerts:
            try:
                # standard alert fields
                alert_id = alert['_id']
                alert_timestamp = alert['fields'][self.timestamp_field][0]
                detection_name = alert['fields']['kibana.alert.rule.name'][0]
                ioc_source_type = alert['fields'].get(self.hostname)
                mitre_codes = alert['fields'].get('threat.tactic.id')

                # multiple indicators in threat object, dedup
                enrichments = alert['fields']['threat.enrichments']
                unique_iocs = list({v['matched.atomic'][0]: v for v in enrichments}.values())
                for indicator in unique_iocs:
                    ioc_value = indicator['matched.atomic'][0]
                    es_ioc_type = indicator.get('indicator.type')
                    ioc_type = (
                        FIELD_MAP.get(es_ioc_type[0])
                        if es_ioc_type is not None
                        else get_category(value=ioc_value)
                    )
                    if ioc_type is None:
                        self.log.info(
                            (
                                'Not able to parse indicator type for '
                                f'Collective Insights: {ioc_value}. Skipping.'
                            )
                        )
                        continue

                    insight_data = {
                        'ioc_value': ioc_value,
                        'ioc_type': ioc_type,
                        'timestamp': alert_timestamp,
                        'incident_id': alert_id,
                        'incident_name': 'Elastic Threat Detection',
                        'incident_type': 'elastic-threat-detection',
                        'detection_name': detection_name,
                        'detection_type': 'correlation',
                    }

                    if ioc_source_type is not None:
                        insight_data['ioc_source_type'] = ioc_source_type[0]
                    if mitre_codes is not None:
                        insight_data['mitre_codes'] = mitre_codes

                    rf_insight = RFInsight(**insight_data)
                    collective_insights_data.append(rf_insight)
            except (KeyError, IndexError) as err:
                # don't kill if one bad document
                self.log.error(
                    f'Not able to parse Elastic document for Collective Insights data: {err}'
                )
            except CategoryParseError as err:
                self.log.error(err)

        return collective_insights_data
