"""
Last Updated: 10-31-2022
Version: 2.0
Author: Marty Vo
############################## TERMS OF USE ####################################
# The following code is provided for demonstration purposes only, and should   #
# not be used without independent verification. Recorded Future makes no       #
# representations or warranties, express, implied, statutory, or otherwise,    #
# regarding this code, and provides it strictly "as-is".                       #
# Recorded Future shall not be liable for, and you assume all risk of          #
# using the foregoing.                                                         #
################################################################################

Writes a JSON credentials list, where each element is produced from /identity/lookup
to a timestamped file. 

Files older than RETENTION_TIME are deleted when this script is run.

Run with -h for usage.

To pull exposed credentials for domain(s) downloaded within the past 7 days:
    python3 recordedfuture_identities.py -t [API-KEY] -d [DOMAIN ...] -ld 7

Output files are named rfidentities_[timestamp].json where [timestamp] is an ISO 8601
timestamp. Each output file contains the raw JSON API response from the
Recorded Future API for exposed identities. The script will ignore identities in
other rfidentities_*.json files already written to disk.
"""
import os
import re
import sys
import ssl
import time
import json
import argparse
import datetime
import logging
import logging.handlers
import splunklib.client
from urllib import request, error

# the following try/except is for python 2/3 compatibility
try:
    from urllib.parse import urlencode
    from urllib.request import urlopen, Request, ProxyHandler, HTTPSHandler, build_opener, install_opener
    from urllib.error import URLError, HTTPError
except:
    from urllib import urlencode
    from urllib2 import urlopen, Request, URLError, HTTPError, ProxyHandler, HTTPSHandler, build_opener, install_opener


RETENTION_TIME_SECONDS = 604800  # One week, should be a multiple of x days

class PasswordNotFoundError(Exception):
    pass


class RFClient():
    """
    Makes HTTPS requests to the Recorded Future API and validates results
    """

    def __init__(self, token, logger, verify):
        self.headers = {
            'X-RFToken': token,
            'User-Agent': "PS-Splunk-Identity/1.0.0",
            'Content-Type': 'application/json',
            'accept': 'application/json'
        }
        self.logger = logger
        self.verify = verify
        self.URL_BASE = 'https://api.recordedfuture.com/identity/'
        self.URL_CREDENTIALS_SEARCH = self.URL_BASE + 'credentials/search'
        self.URL_CREDENTIALS_LOOKUP = self.URL_BASE + 'credentials/lookup'
        self.proxy_settings = {
            "http": "https://0.0.0.0:8888",
            "https": "https://0.0.0.0:8888"
        }

    def make_search_request(self, domains, lookback_days, domain_types, password_properties,
                            exfiltration_days, breach_name, breach_lbd, dump_name, dump_lbd,
                            username_properties, authorization_protocols, authorization_technologies):
        """
        Makes a request to the Identity API search endpoint to pull back a list of identities

        Args:
            domains (list): Domains to search identities for
            lookback_days (int): Number of days back to begin the search
            domain_types (str): Type of identities to retrieve
            password_properties ([str]): Required password properties for returned credentials
            exfiltration_days (int): Number of days since exfiltration
            breach_name (str): Name of breach to restrict identities to
            breach_lbd (int): Number of days back from breach
            dump_name (str): Name of dump to restrict identities to
            dump_lbd (int): Number of days back from dump
            username_properties (list): Username properties of the credentials
            authorization_protocols (list): Authorization protocols of the credentials
            authorization_technologies (list): Authorization technologies of the credentials

        Returns:
            [str, dict]: List of identities which contain email strings or authorization JSONs
        """
        lookback_date = (datetime.datetime.now(
        ) - datetime.timedelta(lookback_days)).strftime("%Y-%m-%dT00:00:00Z")
        page_size = 500
        search_query = {
            "domains": domains,
            "filter": {
                "properties": ([] if password_properties is None else password_properties),
                "latest_downloaded_gte": lookback_date,
                "username_properties": ([] if username_properties is None else username_properties),
                "authorization_technologies": ([] if authorization_technologies is None else authorization_technologies),
                "authorization_protocols": ([] if authorization_protocols is None else authorization_protocols)
            },
            "limit": page_size
        }
        # These parameters must be specified after as empty strings or lists will cause errors in requests to the API
        if domain_types:
            search_query["domain_types"] = [domain_types]
        if exfiltration_days:
            exfiltration_date = (datetime.datetime.now(
            ) - datetime.timedelta(exfiltration_days)).strftime("%Y-%m-%dT00:00:00Z")
            search_query["filter"]["exfiltration_date_gte"] = exfiltration_date
        if breach_name or dump_name:
            if breach_name:
                breach_lookback_date = (lookback_date if breach_lbd is None
                                        else (datetime.datetime.now() - datetime.timedelta(breach_lbd)).strftime("%Y-%m-%dT00:00:00Z"))
                breach_json = {
                    "name": breach_name,
                    "date": breach_lookback_date
                }
                search_query["filter"]["breach_properties"] = breach_json
            if dump_name:
                dump_lookback_date = (lookback_date if dump_lbd is None
                                      else (datetime.datetime.now() - datetime.timedelta(dump_lbd)).strftime("%Y-%m-%dT00:00:00Z"))
                dump_json = {
                    "name": dump_name,
                    "date": dump_lookback_date
                }
                search_query["filter"]["dump_properties"] = dump_json

        search_query_log_str_1 = "Generated search query with parameters: domains={}, domain_types={}, lookback_days={}, properties={}, ".format(
            domains, domain_types, lookback_days, password_properties)
        search_query_log_str_2 = "exfiltration_days={}, breach={}, dump={}, ".format(
            exfiltration_days, breach_name, dump_name)
        search_query_log_str_3 = "username_properties={}, authorization_protocols={}, authorization_technologies={}".format(
            username_properties, authorization_protocols, authorization_technologies)
        self.logger.info(search_query_log_str_1 + search_query_log_str_2 + search_query_log_str_3)
        self.logger.info("Calling {} to find subjects of {} in recently exposed cred leaks...".format(
            self.URL_CREDENTIALS_SEARCH, str(domains)))

        last_page_was_empty = False
        content = None
        identities = []
        try:
            while not last_page_was_empty:
                proxy_handler = ProxyHandler(self.proxy_settings)
                if self.verify == False:
                    ctx = create_ssl_ctx()
                    req = request.Request(self.URL_CREDENTIALS_SEARCH, data=json.dumps(
                        search_query).encode(), headers=self.headers, context=ctx)
                    
                    https_handler = HTTPSHandler(context=ctx)
                    opener = build_opener(https_handler, proxy_handler)
                    install_opener(opener)
                        
                    response = urlopen(req, context=ctx)
                    content = json.loads(response.read().decode('utf-8'))
                    if len(content["identities"]) == 0:
                        last_page_was_empty = True
                    else:
                        page_of_creds = content["identities"]
                        for subject in page_of_creds:
                            identities.append(subject)
                        search_query["offset"] = content["next_offset"]
                else:
                    opener = build_opener(proxy_handler)
                    install_opener(opener)
                    req = request.Request(self.URL_CREDENTIALS_SEARCH, data=json.dumps(
                        search_query).encode(), headers=self.headers)

                    response = urlopen(req)
                    content = json.loads(response.read().decode('utf-8'))
                    if len(content["identities"]) == 0:
                        last_page_was_empty = True
                    else:
                        page_of_creds = content["identities"]
                        for subject in page_of_creds:
                            identities.append(subject)
                        search_query["offset"] = content["next_offset"]
        except error.HTTPError as http_error:
            self.logger.error("Search request failed due to: {}".format(http_error))
            exit(1)
        except error.URLError as url_error:
            self.logger.error("Search request failed due to: {}".format(url_error))
            exit(1)
        except BaseException as e:
            self.logger.error("Search request has failed due to: {}".format(e))
            exit(1)

        return identities

    def make_lookup_request(self, batch_num, identities, lookback_days, password_properties, 
                            breach_name, breach_lbd, dump_name, dump_lbd, username_properties,
                            authorization_protocols, authorization_technologies):
        """
        Makes a request to the Identity API lookup endpoint to pull back information about exposed credentials.
        There is a limit of 500 credentials per lookup.

        Args:
            batch_num (int): Batch number of identities to lookup
            identities (list): Number of days back to begin the search
            lookback_days (int): Number of days back to begin the search
            password_properties ([str]): Required password properties for returned credentials
            breach_name (str): Name of breach to restrict identities to
            breach_lbd (int): Number of days back from breach
            dump_name (str): Name of dump to restrict identities to
            dump_lbd (int): Number of days back from dump
            username_properties (list): Username properties of the credentials
            authorization_protocols (list): Authorization protocols of the credentials
            authorization_technologies (list): Authorization technologies of the credentials
        Returns:
            list: An array of JSON containing information about the exposed credentials
        """
        # Check here for any identities before performing a lookup
        if not identities:
            return identities

        credentials = []
        lookback_date = (datetime.datetime.now() - datetime.timedelta(lookback_days)).strftime("%Y-%m-%dT00:00:00Z")
        email_subjects = []
        login_subjects = []
        for subject in identities:
            if isinstance(subject, dict):
                login_subjects.append(subject)
            else:
                email_subjects.append(subject)

        subjects_query = {
            "subjects": email_subjects,
            "subjects_login": login_subjects,
            "filter": {
                "properties": ([] if password_properties is None else password_properties),
                "latest_downloaded_gte": lookback_date,
                "username_properties": ([] if username_properties is None else username_properties),
                "authorization_technologies": ([] if authorization_technologies is None else authorization_technologies),
                "authorization_protocols": ([] if authorization_protocols is None else authorization_protocols)
            }
        }
        # These parameters must be specified after as empty strings and empty lists will cause errors in requests to the API
        if breach_name or dump_name:
            if breach_name:
                breach_lookback_date = (lookback_date if breach_lbd is None
                                        else (datetime.datetime.now() - datetime.timedelta(breach_lbd)).strftime("%Y-%m-%dT00:00:00Z"))
                breach_json = {
                    "name": breach_name,
                    "date": breach_lookback_date
                }
                subjects_query["filter"]["breach_properties"] = breach_json
            if dump_name:
                dump_lookback_date = (lookback_date if dump_lbd is None
                                    else (datetime.datetime.now() - datetime.timedelta(dump_lbd)).strftime("%Y-%m-%dT00:00:00Z"))
                dump_json = {
                    "name": dump_name,
                    "date": dump_lookback_date
                }
                subjects_query["filter"]["dump_properties"] = dump_json
            
        self.logger.info("Batch {}: Calling /lookup on {} identities".format(batch_num, len(identities)))
        try:
            proxy_handler = ProxyHandler(self.proxy_settings)
            if self.verify == False:
                ctx = create_ssl_ctx()
                req = request.Request(self.URL_CREDENTIALS_LOOKUP, data=json.dumps(
                    subjects_query).encode(), headers=self.headers, context=ctx)
                https_handler = HTTPSHandler(context=ctx)
                opener = build_opener(proxy_handler, https_handler)
                install_opener(opener)

                response = urlopen(req, context=ctx)
                content = json.loads(response.read().decode('utf-8'))
                credentials += content["identities"]
            else:
                req = request.Request(self.URL_CREDENTIALS_LOOKUP, data=json.dumps(
                    subjects_query).encode(), headers=self.headers)
                opener = build_opener(proxy_handler)
                install_opener(opener)

                response = request.urlopen(req)
                content = json.loads(response.read().decode('utf-8'))
                credentials += content["identities"]
        except error.HTTPError as http_error:
            self.logger.error("Lookup request failed during batch lookup {}: {}".format(batch_num, http_error))
        except error.URLError as url_error:
            self.logger.error("Lookup request failed batch lookup {}: {}".format(batch_num, url_error))
        except BaseException as e:
            self.logger.error("Lookup request failed batch lookup: {}: {}".format(batch_num, e))

        return credentials


class IdentitiesManager():
    """
    Requests Recorded Future Identities and creates identities details JSON
    """

    def __init__(self, token, domains, lookback_days, domain_types, password_properties,
                 exfiltration_days, breach_name, breach_lbd, dump_name, dump_lbd, 
                 username_properties, authorization_protocols, authorization_technologies,
                 dir, verify, logger):
        self.client = RFClient(token, logger, verify)
        self.domains = domains
        self.lookback_days = lookback_days
        self.domain_types = domain_types
        self.password_properties = password_properties
        self.exfiltration_days = exfiltration_days
        self.breach_name = breach_name
        self.breach_lbd = breach_lbd
        self.dump_name = dump_name
        self.dump_lbd = dump_lbd
        self.username_properties = username_properties
        self.authorization_protocols = authorization_protocols
        self.authorization_technologies = authorization_technologies
        self.logger = logger
        self.write_dir = dir
        self.identity_file_regex = re.compile('rfidentities_(.*).json')

    def _list_identity_files(self):
        """
        Returns a list of all rfidentities*.json filenames in the write directory

        Arguments:
            None

        Returns:
            [str]: A list of string filenames
        """
        fnames = []
        for fname in os.listdir(self.write_dir):
            match = self.identity_file_regex.match(fname)
            if match == None:
                continue
            else:
                fnames.append(fname)

        return fnames

    def _remove_if_stale(self, fname):
        """
        Removes the supplied file if it is older than RETENTION_TIME

        Args:
            fname (str): File name to potentially remove

        Returns:
            (bool): Whether the file was removed successfully

        Side Effects:
            Deletes rfidentities_.*.json files in the write dir older than RETENTION_TIME
        """
        fullpath = os.path.join(self.write_dir, fname)
        age = time.time() - os.path.getmtime(fullpath)
        if age > RETENTION_TIME_SECONDS:
            try:
                os.remove(fullpath)
                return True
            except:
                self.logger.error("Could not remove file ".format(fullpath))
                return False

    def _file_identities_info(self):
        """
        Returns a list of all identity names in the write dir

        Arguments:
            None

        Returns:
            dict: A dictionary keyed by:
                name,   -- name of the identity (email or login)
        """
        identities = {}
        for fname in self._list_identity_files():
            # Go through remaining identities to find what shouldn't be indexed again
            with open(os.path.join(self.write_dir, fname), 'r') as identities_fp:
                identities_json = json.load(identities_fp)
                for identity in identities_json:
                    name = identity["subject"]
                    hashes = []
                    for hash in identity["exposed_secret"]["hashes"]:
                        if "hash" in hash:
                            hashes.append(hash["hash"])
                    if name not in identities:
                        identities[name] = {"file": fname, "hashes": hashes}
                    else:
                        identities[name]["hashes"] = list(set(identities[name]["hashes"] + hashes))
            # Remove the file only if it is an rfidentities_* file and it is older than RETENTION_TIME
            if self._remove_if_stale(fname):
                self.logger.info("Deleted file {} from disk as age was greater than retention time of {} seconds".format(
                    fname, RETENTION_TIME_SECONDS))

        return identities

    def _write_file(self, identities):
        """
        Writes identities to self.write_dir/rfidentities_[ISO 8601 timestamp].json

        Args:
            identities ([dict]): A JSON array of identity dictionary data

        Returns:
            None

        Side Effects:
            Creates file at self.write_dir + filename with to_write contents
        """
        # Don't create an output file unless there are identities to write
        if identities:
            old_formatted_time = datetime.datetime.now().isoformat()
            formatted_time = re.sub(r':', '_', old_formatted_time)
            filename = 'rfidentities_' + formatted_time + '.json'

            self.logger.info("Writing {} credentials to {}".format(
                str(len(identities)), filename))

            pathname = os.path.join(self.write_dir, filename)
            try:
                with open(pathname, 'w') as outfile:
                    outfile.write(json.dumps(identities))

                self.logger.info("Wrote {} credentials to {}".format(
                    str(len(identities)), filename))
            except Exception as e:
                self.logger.error("Could not write to file: {}".format(e))
        else:
            self.logger.info(
                "No new identities to write, skipping writing to output file")

    def collect_identities(self):
        """
        Collects and writes identity data to files

        Arguments:
            None

        Returns:
            None

        Side Effects:
            Produces file rfidentities_[timestamp].json with alert contents
        """
        creds_to_write = []
        identities_on_disk = self._file_identities_info()
        identity_list_on_api = self.client.make_search_request(self.domains, self.lookback_days, self.domain_types,
                                                               self.password_properties, self.exfiltration_days,
                                                               self.breach_name, self.breach_lbd, self.dump_name, self.dump_lbd,
                                                               self.username_properties, self.authorization_protocols, 
                                                               self.authorization_technologies)

        self.logger.info("Found {} identities on API within lookback days timeframe".format(
            len(identity_list_on_api)))

        # TODO: Perform multithreading on the lookup process
        self.logger.info("Calling {} to find credentials for {} identities".format(
            self.client.URL_CREDENTIALS_LOOKUP, len(identity_list_on_api)))
        chunked_identities_to_search = [
            identity_list_on_api[i:i+500] for i in range(0, len(identity_list_on_api), 500)]
        cred_lookup = []
        for batch_num, chunked_identity_list in enumerate(chunked_identities_to_search):
            tries = 0
            while True:
                cred_lookup_batch = self.client.make_lookup_request(batch_num, chunked_identity_list, self.lookback_days, 
                                                                    self.password_properties, self.breach_name, self.breach_lbd, 
                                                                    self.dump_name, self.dump_lbd, self.username_properties,
                                                                    self.authorization_protocols, self.authorization_technologies)
                if len(cred_lookup_batch) > 0:
                    cred_lookup += cred_lookup_batch
                    break
                if len(cred_lookup_batch) == 0:
                    tries += 1
                if tries == 3:
                    self.logger.error("Maximum attempts (3) for batch lookup {} reached. Will not retry this batch".format(batch_num))
                    break
            
            self.logger.info("Collected credentials count is now: {}".format(len(cred_lookup)))    

        self.logger.info(
            "Retrieved credential information for {} identities".format(len(cred_lookup)))

        # Check API identities against on-disk identities
        # Write to disk if API identity is not on disk already
        creds_on_disk = 0
        total_creds_on_api = 0
        for identity in cred_lookup:
            for cred in identity["credentials"]:
                total_creds_on_api += 1
                api_name = cred["subject"]
                if api_name not in identities_on_disk:
                    creds_to_write.append(cred)
                elif api_name in identities_on_disk:
                    api_name_hashes = []
                    for hash in cred["exposed_secret"]["hashes"]:
                        if "hash" in hash:
                            api_name_hashes.append(hash["hash"])
                    new_cred = set(identities_on_disk[api_name]["hashes"]).isdisjoint(api_name_hashes)
                    if new_cred:
                        creds_to_write.append(cred)
                    else:
                        creds_on_disk +=1
                else:
                    creds_on_disk += 1

        self.logger.info(
            "{} duplicate credentials were already found on disk".format(creds_on_disk))

        self._write_file(creds_to_write)


def create_ssl_ctx():
    """Create the SSL context if no SSL verification is selected

    Returns:
        A SSL context of none for no verification
    """
    ctx = ssl.create_default_context()
    ctx.check_hostname = False
    ctx.verify_mode = ssl.CERT_NONE

    return ctx


def get_rf_apikey(logger, addr, port):
    """Gets RF API key from Splunk instance using Storage Passwords facility
    
    Arguments:
        logger -- a logging object for emitting log records
        addr   -- the IP or FQDN of the Splunk instance with the RF API key
        port   -- the port number of the Splunk instance with the RF API key
    
    Returns:
    
    Raises:
        PasswordNotFoundError -- Raised when the Splunk storage passwords
                                 service does not provide an RF API key
    """
    logger.info('Obtaining Splunk authorization headers via stdin')
    headers = sys.stdin.read() # Splunk docs call auth info from stdin "headers"
    headers_json = '{'
    first = True
    for line in headers.split():
        if first:
            first = False
        else:
            headers_json += ','
        headers_json += '"'
        info_pair = line.split(':')
        headers_json += info_pair[0]
        headers_json += '": "'
        headers_json += info_pair[1]
        headers_json += '"'
    headers_json += '}'

    try:
        headers_dict = json.loads(headers_json)
    except json.decoder.JSONDecodeError as err:
        logger.error('Error decoding JSON constructed from stdin: %s', err)
    try:
        service = splunklib.client.connect(host=addr, port=port,
                                           app='TA-recordedfuture',
                                           token=headers_dict['sessionKey'])
    except splunklib.client.AuthenticationError as err:
        logger.error('Splunk instance login failed')
        logger.error('Recorded Future API key could not be obtained')
        logger.error('sessionKey: %s' % headers_dict['sessionKey'])
        raise

    keys = service.storage_passwords
    found = False
    for key in keys:
        if key.name == 'TA-recordedfuture:encrypted_passwords:':
            try:
                pw_info = json.loads(key.content['clear_password'])
            except json.decoder.JSONDecodeError as err:
                logger.error('Error decoding storage passwords JSON: %s' % err)
            apikey = pw_info['api_key']
            found = True

    if not found:
        raise PasswordNotFoundError

    logger.info('API key retrieved from Splunk Storage Passwords service')

    return apikey


def set_logs(level, write_dir):
    """Sets the log level and output

    Arguments:
        Logging level of either: 'info', 'debug', 'warn', 'error', 'critical'
        Directory to store log files

    Returns:
        A fresh logging object
    """
    log_level = getattr(logging, level.upper())
    log_filename = os.path.join(
        write_dir, 'recordedfuture_identitydetails.log')
    try:
        logging.basicConfig(level=log_level,
                            filename=log_filename,
                            filemode='a')
        handler = logging.handlers.RotatingFileHandler(log_filename,
                                                       maxBytes=2000000,
                                                       backupCount=5)
    except IOError:
        logging.basicConfig(level=log_level)
        handler = logging.StreamHandler()

    formatter = logging.Formatter(fmt='%(asctime)s %(levelname)-8s %(message)s',
                                  datefmt='%H:%M:%S')
    handler.setFormatter(formatter)

    logger = logging.getLogger(__name__)
    logger.addHandler(handler)
    logger.propagate = False

    now = datetime.datetime.now()
    formatted_time = now.strftime('%d %B %I:%M%p')
    logger.info('Logger initialized {}'.format(formatted_time))

    return logger


def str2bool(v):
    """
    Determines whether the given string value should be converted to True or False.

    Arguments:
        The string value to check

    Returns:
        Boolean value of True or False
    """
    if isinstance(v, bool):
        return v
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value \
                                          [yes/no, true/false, 0/1] expected.')


def parse_args():
    """
    Parse command-line arguments

    Arguments:
        None

    Returns:
        A Namespace object containing argument data
    """

    parser = argparse.ArgumentParser(
        description='Retrieves exposed credentials for the specified domains and timeframe.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument(
        '-t', '--token',
        type=str,
        help='Recorded Future API token'
    )
    parser.add_argument(
        '-ld', '--lookback_days',
        type=int,
        required=True,
        help='Only include breaches from this amount of days onwards (required)'

    )
    parser.add_argument(
        '-d', '--domains',
        type=str,
        required=True,
        nargs='+',
        help='Domains you want to search exposed credentials for (required)'
    )
    parser.add_argument(
        '-dt', '--domain_types',
        type=str,
        default=None,
        nargs='?',
        choices=['Authorization', 'Email'],
        help='Email will pull credentials with the domain in the email, whereas '
             'Authorization will pull credentials with the domain in the authorization string (default: None)'
    )
    parser.add_argument(
        '-pp', '--password_properties',
        type=str,
        default=[],
        nargs='*',
        choices=['Letter', 'Number', 'Symbol', 'UpperCase', 'LowerCase', 'MixedCase',
                 'AtLeast8Characters', 'AtLeast12Characters', 'AtLeast16Characters', 'AtLeast24Characters'],
        help='Only include breaches of passwords that exhibit these properties'
    )
    parser.add_argument(
        '-ed', '--exfiltration_days',
        type=int,
        default=None,
        nargs='?',
        help='Only include exfiltrations from this amount of days onwards'
    )
    parser.add_argument(
        '-bn', '--breach_name',
        type=str,
        default=None,
        nargs='?',
        help='Include exposed credentials from this breach only'
    )
    parser.add_argument(
        '-bld', '--breach_lookback_days',
        type=int,
        default=None,
        nargs='?',
        help='Number of days since the specified breach'
    )
    parser.add_argument(
        '-dn', '--dump_name',
        type=str,
        default=None,
        nargs='?',
        help='Include exposed credentials from this dump only'
    )
    parser.add_argument(
        '-dld', '--dump_lookback_days',
        type=int,
        default=None,
        nargs='?',
        help='Number of days since the specified dump'
    )
    parser.add_argument(
        '-un', '--username_properties',
        type=str,
        nargs='*',
        help='Only include credentials with these username properties'
    )
    parser.add_argument(
        '-at', '--authorization_technologies',
        type=str,
        nargs='*',
        help='Only include credentials with these authorization technologies'
    )
    parser.add_argument(
        '-ap', '--authorization_protocols',
        type=str,
        nargs='*',
        help='Only include credentials with these authorization protocols'
    )
    parser.add_argument(
        '-l', '--log',
        type=str,
        default='info',
        nargs='?',
        choices=['info', 'debug', 'warn', 'error', 'critical'],
        help='The level of logging: info, debug, warn, error, critical'
    )
    parser.add_argument(
        '--dir',
        type=str,
        const=os.path.join('..', 'local'),
        default=os.path.join('..', 'local'),
        nargs='?',
        help='Directory to write results files to'
    ),
    parser.add_argument(
        '-a', '--addr',
        type=str,
        help='Address of the Splunk host storage passwords endpoint',
        default='localhost',
        dest='addr'
    )
    parser.add_argument(
        '-p', '--port',
        type=int,
        help='Splunk instance port to connect to',
        default=8089,
        dest='port'
    )
    parser.add_argument(
        '--no_verify',
        type=str2bool,
        const=True,
        default=False,
        nargs='?',
        help='Disable SSL certificate checking (not recommended)'
    )

    return parser.parse_args()


def main():
    args = parse_args()
    logger = set_logs(args.log, args.dir)

    try:
        apikey = (args.token if args.token is not None
                  else get_rf_apikey(logger, args.addr, args.port))
    except PasswordNotFoundError:
        logger.error('RF API key could not be read from Storage Passwords or command line arguments')
        logger.error('Exiting script now.')
        exit(0)

    identities = IdentitiesManager(apikey, args.domains, args.lookback_days, args.domain_types,
                                   args.password_properties, args.exfiltration_days, args.breach_name,
                                   args.breach_lookback_days, args.dump_name, args.dump_lookback_days,
                                   args.username_properties, args.authorization_protocols, 
                                   args.authorization_technologies, args.dir, not args.no_verify, logger)
    identities.collect_identities()


if __name__ == '__main__':
    main()
