diff --git a/docs/source/api/msticpy.auth.cloud_mappings_offline.rst b/docs/source/api/msticpy.auth.cloud_mappings_offline.rst new file mode 100644 index 000000000..30305a2b3 --- /dev/null +++ b/docs/source/api/msticpy.auth.cloud_mappings_offline.rst @@ -0,0 +1,7 @@ +msticpy.auth.cloud\_mappings\_offline module +============================================ + +.. automodule:: msticpy.auth.cloud_mappings_offline + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/msticpy.auth.rst b/docs/source/api/msticpy.auth.rst index c24817a16..b129050f6 100644 --- a/docs/source/api/msticpy.auth.rst +++ b/docs/source/api/msticpy.auth.rst @@ -15,6 +15,7 @@ Submodules msticpy.auth.azure_auth msticpy.auth.azure_auth_core msticpy.auth.cloud_mappings + msticpy.auth.cloud_mappings_offline msticpy.auth.cred_wrapper msticpy.auth.keyring_client msticpy.auth.keyvault_client diff --git a/docs/source/data_acquisition/DataQueries.rst b/docs/source/data_acquisition/DataQueries.rst index 58755b5d4..0d43cd686 100644 --- a/docs/source/data_acquisition/DataQueries.rst +++ b/docs/source/data_acquisition/DataQueries.rst @@ -20,7 +20,7 @@ Azure list_azure_activity_for_ip Returns Azure Activity for Azure list_azure_activity_for_resource Returns Azure Activity for an Azure Resource ID end (datetime), resource_id (str), start (datetime) AzureActivity Azure list_storage_ops_for_hash Returns Azure Storage Operations for an MD5 file hash end (datetime), file_hash (str), start (datetime) StorageFileLogs Azure list_storage_ops_for_ip Returns Storage Operations for an IP Address end (datetime), ip_address (str), start (datetime) StorageFileLogs -AzureNetwork all_network_connections_csl no description end (datetime), start (datetime) CommonSecurityLog +AzureNetwork all_network_connections_csl Returns all network connections for a time range (CommonSecurityLog) end (datetime), start (datetime) CommonSecurityLog AzureNetwork az_net_analytics Returns all Azure Network Flow (NSG) Data for a given host end (datetime), start (datetime) AzureNetworkAnalytics_CL AzureNetwork dns_lookups_for_domain Returns DNS query events for a specified domain domain (str), end (datetime), start (datetime) DnsEvents AzureNetwork dns_lookups_for_ip Returns Dns query events that contain a resolved IP address end (datetime), ip_address (str), start (datetime) DnsEvents @@ -30,9 +30,9 @@ AzureNetwork get_heartbeat_for_ip Returns latest OMS Heartbe AzureNetwork get_host_for_ip Returns the most recent Azure NSG Interface event for an IP Address. end (datetime), ip_address (str), start (datetime) AzureNetworkAnalytics_CL AzureNetwork get_ips_for_host Returns the most recent Azure Network NSG Interface event for a host. end (datetime), host_name (str), start (datetime) AzureNetworkAnalytics_CL AzureNetwork host_network_connections_csl Returns network connections to and from a host (CommonSecurityLog) end (datetime), start (datetime) CommonSecurityLog -AzureNetwork hosts_by_ip_csl no description end (datetime), start (datetime) CommonSecurityLog -AzureNetwork ip_network_connections_csl no description end (datetime), start (datetime) CommonSecurityLog -AzureNetwork ips_by_host_csl no description end (datetime), start (datetime) CommonSecurityLog +AzureNetwork hosts_by_ip_csl Returns hosts associated with a IP addresses (CommonSecurityLog) end (datetime), start (datetime) CommonSecurityLog +AzureNetwork ip_network_connections_csl Returns network connections to and from an IP address (CommonSecurityLog) end (datetime), start (datetime) CommonSecurityLog +AzureNetwork ips_by_host_csl Returns all IP addresses associated with a host (CommonSecurityLog) end (datetime), start (datetime) CommonSecurityLog AzureNetwork list_azure_network_flows_by_host Returns Azure NSG flow events for a host. end (datetime), host_name (str), start (datetime) AzureNetworkAnalytics_CL AzureNetwork list_azure_network_flows_by_ip Returns Azure NSG flow events for an IP Address. end (datetime), ip_address_list (list), start (datetime) AzureNetworkAnalytics_CL AzureNetwork network_connections_to_url Returns connections to a URL or domain (CommonSecurityLog) end (datetime), start (datetime), url (str) CommonSecurityLog @@ -48,6 +48,9 @@ Heartbeat get_heartbeat_for_host Returns latest OMS Heartbe Heartbeat get_heartbeat_for_ip Returns latest OMS Heartbeat event for ip address. end (datetime), ip_address (str), start (datetime) Heartbeat Heartbeat get_info_by_hostname Deprecated - use 'get_heartbeat_for_host' end (datetime), host_name (str), start (datetime) Heartbeat Heartbeat get_info_by_ipaddress Deprecated - use 'get_heartbeat_for_ip' end (datetime), ip_address (str), start (datetime) Heartbeat +IdentityOnPrem logons_for_account Return all Active Directory on-premises user logons for user name account_name (str), end (datetime), start (datetime) IdentityLogonEvents +IdentityOnPrem logons_for_host Return all Active Directory on-premises user logons for host/device name end (datetime), host_name (str), start (datetime) IdentityLogonEvents +IdentityOnPrem logons_for_ip Return all Active Directory on-premises user logons for ip address end (datetime), ip_address (str), start (datetime) IdentityLogonEvents LinuxAudit auditd_all Extract all audit messages grouped by mssg_id end (datetime), start (datetime) AuditLog_CL LinuxSyslog all_syslog Returns all syslog activity for a host end (datetime), start (datetime) Syslog LinuxSyslog cron_activity Returns all cron activity for a host end (datetime), start (datetime) Syslog @@ -65,19 +68,32 @@ LinuxSyslog summarize_events Returns summarized syslog LinuxSyslog sysmon_process_events Sysmon Process Events on host end (datetime), host_name (str), start (datetime) - LinuxSyslog user_group_activity Returns all user/group additions, deletions, and modifications for a host end (datetime), start (datetime) Syslog LinuxSyslog user_logon User logon events on a host end (datetime), host_name (str), start (datetime) Syslog -M365D host_connections Returns connections by for a specified hostname end (datetime), host_name (str), start (datetime) DeviceNetworkEvents +M365D application_alerts Lists alerts associated with a cloud app or OAuth app app_name (str), end (datetime), start (datetime) AlertInfo +M365D host_alerts Lists alerts associated with host/device name end (datetime), host_name (str), start (datetime) AlertInfo +M365D host_connections Returns connections by a specified hostname end (datetime), host_name (str), start (datetime) DeviceNetworkEvents +M365D ip_alerts Lists alerts associated with a specified remote IP end (datetime), ip_address (str), start (datetime) AlertInfo M365D ip_connections Returns network connections associated with a specified remote IP end (datetime), ip_address (str), start (datetime) DeviceNetworkEvents +M365D list_alerts Retrieves list of alerts end (datetime), start (datetime) AlertInfo +M365D list_alerts_with_evidence Retrieves list of alerts with their evidence end (datetime), start (datetime) AlertInfo M365D list_connections Retrieves list of all network connections end (datetime), start (datetime) DeviceNetworkEvents M365D list_file_events_for_filename Lists all file events by filename end (datetime), file_name (str), start (datetime) DeviceFileEvents M365D list_file_events_for_hash Lists all file events by hash end (datetime), file_hash (str), start (datetime) DeviceFileEvents M365D list_file_events_for_host Lists all file events for a host/device end (datetime), start (datetime) DeviceFileEvents M365D list_file_events_for_path Lists all file events from files in a certain path end (datetime), path (str), start (datetime) DeviceFileEvents M365D list_host_processes Return all process creations for a host for the specified time range end (datetime), host_name (str), start (datetime) DeviceProcessEvents +M365D mail_message_alerts Lists alerts associated with a specified mail message end (datetime), message_id (str), start (datetime) AlertInfo +M365D mailbox_alerts Lists alerts associated with a specified mailbox end (datetime), mailbox (str), start (datetime) AlertInfo +M365D process_alerts Lists alerts associated with a specified process end (datetime), file_name (str), start (datetime) AlertInfo M365D process_cmd_line Lists all processes with a command line containing a string (all hosts) cmd_line (str), end (datetime), start (datetime) DeviceProcessEvents M365D process_creations Return all processes with matching name or hash (all hosts) end (datetime), process_identifier (str), start (datetime) DeviceProcessEvents M365D process_paths Return all processes with a matching path (part path) (all hosts) end (datetime), file_path (str), start (datetime) DeviceProcessEvents M365D protocol_connections Returns connections associated with a specified protocol (port number) end (datetime), protocol (str), start (datetime) DeviceNetworkEvents +M365D registry_key_alerts Lists alerts associated with a specified registry key end (datetime), key_name (str), start (datetime) AlertInfo +M365D sha1_alerts Lists alerts associated with a specified SHA1 hash end (datetime), file_hash (str), start (datetime) AlertInfo +M365D sha256_alerts Lists alerts associated with a specified SHA256 hash end (datetime), file_hash (str), start (datetime) AlertInfo +M365D url_alerts Lists alerts associated with a specified URL end (datetime), start (datetime), url (str) AlertInfo M365D url_connections Returns connections associated with a specified URL end (datetime), start (datetime), url (str) DeviceNetworkEvents +M365D user_alerts Lists alerts associated with a specified user account_name (str), end (datetime), start (datetime) AlertInfo M365D user_files Return all files created by a user account_name (str), end (datetime), start (datetime) - M365D user_logons Return all user logons for user name account_name (str), end (datetime), start (datetime) - M365D user_network Return all network connections associated with a user account_name (str), end (datetime), start (datetime) - @@ -135,15 +151,15 @@ MultiDataSource get_timeseries_data Generic query to return Ti MultiDataSource get_timeseries_decompose Generic Time Series decomposition using native KQL analysis (series_decompose) end (datetime), start (datetime), table (str) na MultiDataSource plot_timeseries_datawithbaseline Plot of Time Series data using native KQL analysis and plot rendering (KQLMagic only) end (datetime), start (datetime), table (str) na MultiDataSource plot_timeseries_scoreanomolies Plot Time Series anomaly score using native KQL render (KQLMagic only) end (datetime), start (datetime), table (str) na -Network all_network_connections_csl no description end (datetime), start (datetime) CommonSecurityLog +Network all_network_connections_csl Returns all network connections for a time range (CommonSecurityLog) end (datetime), start (datetime) CommonSecurityLog Network get_heartbeat_for_host Returns latest OMS Heartbeat event for host. end (datetime), host_name (str), start (datetime) Heartbeat Network get_heartbeat_for_ip Returns latest OMS Heartbeat event for ip address. end (datetime), ip_address (str), start (datetime) Heartbeat Network get_host_for_ip Returns the most recent Azure NSG Interface event for an IP Address. end (datetime), ip_address (str), start (datetime) AzureNetworkAnalytics_CL Network get_ips_for_host Returns the most recent Azure Network NSG Interface event for a host. end (datetime), host_name (str), start (datetime) AzureNetworkAnalytics_CL Network host_network_connections_csl Returns network connections to and from a host (CommonSecurityLog) end (datetime), start (datetime) CommonSecurityLog -Network hosts_by_ip_csl no description end (datetime), start (datetime) CommonSecurityLog -Network ip_network_connections_csl no description end (datetime), start (datetime) CommonSecurityLog -Network ips_by_host_csl no description end (datetime), start (datetime) CommonSecurityLog +Network hosts_by_ip_csl Returns hosts associated with a IP addresses (CommonSecurityLog) end (datetime), start (datetime) CommonSecurityLog +Network ip_network_connections_csl Returns network connections to and from an IP address (CommonSecurityLog) end (datetime), start (datetime) CommonSecurityLog +Network ips_by_host_csl Returns all IP addresses associated with a host (CommonSecurityLog) end (datetime), start (datetime) CommonSecurityLog Network list_azure_network_flows_by_host Returns Azure NSG flow events for a host. end (datetime), host_name (str), start (datetime) AzureNetworkAnalytics_CL Network list_azure_network_flows_by_ip Returns Azure NSG flow events for an IP Address. end (datetime), ip_address_list (list), start (datetime) AzureNetworkAnalytics_CL Network network_connections_to_url Returns connections to a URL or domain (CommonSecurityLog) end (datetime), start (datetime), url (str) CommonSecurityLog @@ -194,80 +210,83 @@ Queries for Microsoft 365 Defender Data Environment identifier: M365D -============ ============================= ================================================================================================================================== ================================================================== =================== -QueryGroup Query Description Req-Params Table -============ ============================= ================================================================================================================================== ================================================================== =================== -M365D application_alerts Lists alerts associated with a cloud app or OAuth app app_name (str), end (datetime), start (datetime) AlertInfo -M365D host_alerts Lists alerts by for a specified hostname end (datetime), host_name (str), start (datetime) AlertInfo -M365D host_connections Returns connections by for a specified hostname end (datetime), host_name (str), start (datetime) DeviceNetworkEvents -M365D ip_alerts Lists alerts associated with a specified remote IP end (datetime), ip_address (str), start (datetime) AlertInfo -M365D ip_connections Returns network connections associated with a specified remote IP end (datetime), ip_address (str), start (datetime) DeviceNetworkEvents -M365D list_alerts Retrieves list of alerts end (datetime), start (datetime) AlertInfo -M365D list_alerts_with_evidence Retrieves list of alerts with their evidence end (datetime), start (datetime) AlertInfo -M365D list_connections Retrieves list of all network connections end (datetime), start (datetime) DeviceNetworkEvents -M365D list_file_events_for_filename Lists all file events by filename end (datetime), file_name (str), start (datetime) DeviceFileEvents -M365D list_file_events_for_hash Lists all file events by hash end (datetime), file_hash (str), start (datetime) DeviceFileEvents -M365D list_file_events_for_host Lists all file events for a host/device end (datetime), start (datetime) DeviceFileEvents -M365D list_file_events_for_path Lists all file events from files in a certain path end (datetime), path (str), start (datetime) DeviceFileEvents -M365D list_host_processes Return all process creations for a host for the specified time range end (datetime), host_name (str), start (datetime) DeviceProcessEvents -M365D mail_message_alerts Lists alerts associated with a specified mail message end (datetime), message_id (str), start (datetime) AlertInfo -M365D mailbox_alerts Lists alerts associated with a specified mailbox end (datetime), mailbox (str), start (datetime) AlertInfo -M365D process_alerts Lists alerts associated with a specified process end (datetime), file_name (str), start (datetime) AlertInfo -M365D process_cmd_line Lists all processes with a command line containing a string (all hosts) cmd_line (str), end (datetime), start (datetime) DeviceProcessEvents -M365D process_creations Return all processes with matching name or hash (all hosts) end (datetime), process_identifier (str), start (datetime) DeviceProcessEvents -M365D process_paths Return all processes with a matching path (part path) (all hosts) end (datetime), file_path (str), start (datetime) DeviceProcessEvents -M365D protocol_connections Returns connections associated with a specified protocol (port number) end (datetime), protocol (str), start (datetime) DeviceNetworkEvents -M365D registry_key_alerts Lists alerts associated with a specified registry key end (datetime), key_name (str), start (datetime) AlertInfo -M365D sha1_alerts Lists alerts associated with a specified SHA1 hash end (datetime), file_hash (str), start (datetime) AlertInfo -M365D sha256_alerts Lists alerts associated with a specified SHA256 hash end (datetime), file_hash (str), start (datetime) AlertInfo -M365D url_alerts Lists alerts associated with a specified URL end (datetime), start (datetime), url (str) AlertInfo -M365D url_connections Returns connections associated with a specified URL end (datetime), start (datetime), url (str) DeviceNetworkEvents -M365D user_alerts Lists alerts associated with a specified user account_name (str), end (datetime), start (datetime) AlertInfo -M365D user_files Return all files created by a user account_name (str), end (datetime), start (datetime) - -M365D user_logons Return all user logons for user name account_name (str), end (datetime), start (datetime) - -M365D user_network Return all network connections associated with a user account_name (str), end (datetime), start (datetime) - -M365D user_processes Return all processes created by a user account_name (str), end (datetime), start (datetime) - -M365DHunting accessibility_persistence This query looks for persistence or privilege escalation done using Windows Accessibility features. end (datetime), start (datetime) - -M365DHunting av_sites Pivot from downloads detected by Windows Defender Antivirus to other files downloaded from the same sites end (datetime), start (datetime) - -M365DHunting b64_pe Finding base64 encoded PE files header seen in the command line parameters end (datetime), start (datetime) - -M365DHunting brute_force Look for public IP addresses that failed to logon to a computer multiple times, using multiple accounts, and eventually succeeded. end (datetime), start (datetime) - -M365DHunting cve_2018_1000006l Looks for CVE-2018-1000006 exploitation end (datetime), start (datetime) - -M365DHunting cve_2018_1111 Looks for CVE-2018-1111 exploitation end (datetime), start (datetime) - -M365DHunting cve_2018_4878 This query checks for specific processes and domain TLD used in the CVE-2018-4878 end (datetime), start (datetime) - -M365DHunting doc_with_link Looks for a Word document attachment, from which a link was clicked, and after which there was a browser download. end (datetime), start (datetime) - -M365DHunting dropbox_link Looks for user content downloads from dropbox that originate from a link/redirect from a 3rd party site. end (datetime), start (datetime) - -M365DHunting email_link Look for links opened from mail apps – if a detection occurred right afterwards end (datetime), start (datetime) - -M365DHunting email_smartscreen Look for links opened from outlook.exe, followed by a browser download and then a SmartScreen app warning end (datetime), start (datetime) - -M365DHunting malware_recycle Finding attackers hiding malware in the recycle bin. end (datetime), start (datetime) - -M365DHunting network_scans Looking for high volume queries against a given RemoteIP, per ComputerName, RemotePort and Process end (datetime), start (datetime) - -M365DHunting powershell_downloads Finds PowerShell execution events that could involve a download. end (datetime), start (datetime) - -M365DHunting service_account_powershell Service Accounts Performing Remote PowerShell end (datetime), start (datetime) - -M365DHunting smartscreen_ignored Query for SmartScreen URL blocks, where the user has decided to run the malware nontheless. end (datetime), start (datetime) - -M365DHunting smb_discovery Query for processes that accessed more than 10 IP addresses over port 445 (SMB) - possibly scanning for network shares. end (datetime), start (datetime) - -M365DHunting tor Looks for Tor client, or for a common Tor plugin called Meek. end (datetime), start (datetime) - -M365DHunting uncommon_powershell Find which uncommon Powershell Cmdlets were executed on that machine in a certain time period. end (datetime), host_name (str), start (datetime), timestamp (str) - -M365DHunting user_enumeration The query finds attempts to list users or groups using Net commands end (datetime), start (datetime) - -MDEHunting accessibility_persistence This query looks for persistence or privilege escalation done using Windows Accessibility features. end (datetime), start (datetime) - -MDEHunting av_sites Pivot from downloads detected by Windows Defender Antivirus to other files downloaded from the same sites end (datetime), start (datetime) - -MDEHunting b64_pe Finding base64 encoded PE files header seen in the command line parameters end (datetime), start (datetime) - -MDEHunting brute_force Look for public IP addresses that failed to logon to a computer multiple times, using multiple accounts, and eventually succeeded. end (datetime), start (datetime) - -MDEHunting cve_2018_1000006l Looks for CVE-2018-1000006 exploitation end (datetime), start (datetime) - -MDEHunting cve_2018_1111 Looks for CVE-2018-1111 exploitation end (datetime), start (datetime) - -MDEHunting cve_2018_4878 This query checks for specific processes and domain TLD used in the CVE-2018-4878 end (datetime), start (datetime) - -MDEHunting doc_with_link Looks for a Word document attachment, from which a link was clicked, and after which there was a browser download. end (datetime), start (datetime) - -MDEHunting dropbox_link Looks for user content downloads from dropbox that originate from a link/redirect from a 3rd party site. end (datetime), start (datetime) - -MDEHunting email_link Look for links opened from mail apps – if a detection occurred right afterwards end (datetime), start (datetime) - -MDEHunting email_smartscreen Look for links opened from outlook.exe, followed by a browser download and then a SmartScreen app warning end (datetime), start (datetime) - -MDEHunting malware_recycle Finding attackers hiding malware in the recycle bin. end (datetime), start (datetime) - -MDEHunting network_scans Looking for high volume queries against a given RemoteIP, per ComputerName, RemotePort and Process end (datetime), start (datetime) - -MDEHunting powershell_downloads Finds PowerShell execution events that could involve a download. end (datetime), start (datetime) - -MDEHunting service_account_powershell Service Accounts Performing Remote PowerShell end (datetime), start (datetime) - -MDEHunting smartscreen_ignored Query for SmartScreen URL blocks, where the user has decided to run the malware nontheless. end (datetime), start (datetime) - -MDEHunting smb_discovery Query for processes that accessed more than 10 IP addresses over port 445 (SMB) - possibly scanning for network shares. end (datetime), start (datetime) - -MDEHunting tor Looks for Tor client, or for a common Tor plugin called Meek. end (datetime), start (datetime) - -MDEHunting uncommon_powershell Find which uncommon Powershell Cmdlets were executed on that machine in a certain time period. end (datetime), host_name (str), start (datetime), timestamp (str) - -MDEHunting user_enumeration The query finds attempts to list users or groups using Net commands end (datetime), start (datetime) - -============ ============================= ================================================================================================================================== ================================================================== =================== +============== ============================= ================================================================================================================================== ================================================================== =================== +QueryGroup Query Description Req-Params Table +============== ============================= ================================================================================================================================== ================================================================== =================== +IdentityOnPrem logons_for_account Return all Active Directory on-premises user logons for user name account_name (str), end (datetime), start (datetime) IdentityLogonEvents +IdentityOnPrem logons_for_host Return all Active Directory on-premises user logons for host/device name end (datetime), host_name (str), start (datetime) IdentityLogonEvents +IdentityOnPrem logons_for_ip Return all Active Directory on-premises user logons for ip address end (datetime), ip_address (str), start (datetime) IdentityLogonEvents +M365D application_alerts Lists alerts associated with a cloud app or OAuth app app_name (str), end (datetime), start (datetime) AlertInfo +M365D host_alerts Lists alerts associated with host/device name end (datetime), host_name (str), start (datetime) AlertInfo +M365D host_connections Returns connections by a specified hostname end (datetime), host_name (str), start (datetime) DeviceNetworkEvents +M365D ip_alerts Lists alerts associated with a specified remote IP end (datetime), ip_address (str), start (datetime) AlertInfo +M365D ip_connections Returns network connections associated with a specified remote IP end (datetime), ip_address (str), start (datetime) DeviceNetworkEvents +M365D list_alerts Retrieves list of alerts end (datetime), start (datetime) AlertInfo +M365D list_alerts_with_evidence Retrieves list of alerts with their evidence end (datetime), start (datetime) AlertInfo +M365D list_connections Retrieves list of all network connections end (datetime), start (datetime) DeviceNetworkEvents +M365D list_file_events_for_filename Lists all file events by filename end (datetime), file_name (str), start (datetime) DeviceFileEvents +M365D list_file_events_for_hash Lists all file events by hash end (datetime), file_hash (str), start (datetime) DeviceFileEvents +M365D list_file_events_for_host Lists all file events for a host/device end (datetime), start (datetime) DeviceFileEvents +M365D list_file_events_for_path Lists all file events from files in a certain path end (datetime), path (str), start (datetime) DeviceFileEvents +M365D list_host_processes Return all process creations for a host for the specified time range end (datetime), host_name (str), start (datetime) DeviceProcessEvents +M365D mail_message_alerts Lists alerts associated with a specified mail message end (datetime), message_id (str), start (datetime) AlertInfo +M365D mailbox_alerts Lists alerts associated with a specified mailbox end (datetime), mailbox (str), start (datetime) AlertInfo +M365D process_alerts Lists alerts associated with a specified process end (datetime), file_name (str), start (datetime) AlertInfo +M365D process_cmd_line Lists all processes with a command line containing a string (all hosts) cmd_line (str), end (datetime), start (datetime) DeviceProcessEvents +M365D process_creations Return all processes with matching name or hash (all hosts) end (datetime), process_identifier (str), start (datetime) DeviceProcessEvents +M365D process_paths Return all processes with a matching path (part path) (all hosts) end (datetime), file_path (str), start (datetime) DeviceProcessEvents +M365D protocol_connections Returns connections associated with a specified protocol (port number) end (datetime), protocol (str), start (datetime) DeviceNetworkEvents +M365D registry_key_alerts Lists alerts associated with a specified registry key end (datetime), key_name (str), start (datetime) AlertInfo +M365D sha1_alerts Lists alerts associated with a specified SHA1 hash end (datetime), file_hash (str), start (datetime) AlertInfo +M365D sha256_alerts Lists alerts associated with a specified SHA256 hash end (datetime), file_hash (str), start (datetime) AlertInfo +M365D url_alerts Lists alerts associated with a specified URL end (datetime), start (datetime), url (str) AlertInfo +M365D url_connections Returns connections associated with a specified URL end (datetime), start (datetime), url (str) DeviceNetworkEvents +M365D user_alerts Lists alerts associated with a specified user account_name (str), end (datetime), start (datetime) AlertInfo +M365D user_files Return all files created by a user account_name (str), end (datetime), start (datetime) - +M365D user_logons Return all user logons for user name account_name (str), end (datetime), start (datetime) - +M365D user_network Return all network connections associated with a user account_name (str), end (datetime), start (datetime) - +M365D user_processes Return all processes created by a user account_name (str), end (datetime), start (datetime) - +M365DHunting accessibility_persistence This query looks for persistence or privilege escalation done using Windows Accessibility features. end (datetime), start (datetime) - +M365DHunting av_sites Pivot from downloads detected by Windows Defender Antivirus to other files downloaded from the same sites end (datetime), start (datetime) - +M365DHunting b64_pe Finding base64 encoded PE files header seen in the command line parameters end (datetime), start (datetime) - +M365DHunting brute_force Look for public IP addresses that failed to logon to a computer multiple times, using multiple accounts, and eventually succeeded. end (datetime), start (datetime) - +M365DHunting cve_2018_1000006l Looks for CVE-2018-1000006 exploitation end (datetime), start (datetime) - +M365DHunting cve_2018_1111 Looks for CVE-2018-1111 exploitation end (datetime), start (datetime) - +M365DHunting cve_2018_4878 This query checks for specific processes and domain TLD used in the CVE-2018-4878 end (datetime), start (datetime) - +M365DHunting doc_with_link Looks for a Word document attachment, from which a link was clicked, and after which there was a browser download. end (datetime), start (datetime) - +M365DHunting dropbox_link Looks for user content downloads from dropbox that originate from a link/redirect from a 3rd party site. end (datetime), start (datetime) - +M365DHunting email_link Look for links opened from mail apps – if a detection occurred right afterwards end (datetime), start (datetime) - +M365DHunting email_smartscreen Look for links opened from outlook.exe, followed by a browser download and then a SmartScreen app warning end (datetime), start (datetime) - +M365DHunting malware_recycle Finding attackers hiding malware in the recycle bin. end (datetime), start (datetime) - +M365DHunting network_scans Looking for high volume queries against a given RemoteIP, per ComputerName, RemotePort and Process end (datetime), start (datetime) - +M365DHunting powershell_downloads Finds PowerShell execution events that could involve a download. end (datetime), start (datetime) - +M365DHunting service_account_powershell Service Accounts Performing Remote PowerShell end (datetime), start (datetime) - +M365DHunting smartscreen_ignored Query for SmartScreen URL blocks, where the user has decided to run the malware nontheless. end (datetime), start (datetime) - +M365DHunting smb_discovery Query for processes that accessed more than 10 IP addresses over port 445 (SMB) - possibly scanning for network shares. end (datetime), start (datetime) - +M365DHunting tor Looks for Tor client, or for a common Tor plugin called Meek. end (datetime), start (datetime) - +M365DHunting uncommon_powershell Find which uncommon Powershell Cmdlets were executed on that machine in a certain time period. end (datetime), host_name (str), start (datetime), timestamp (str) - +M365DHunting user_enumeration The query finds attempts to list users or groups using Net commands end (datetime), start (datetime) - +MDEHunting accessibility_persistence This query looks for persistence or privilege escalation done using Windows Accessibility features. end (datetime), start (datetime) - +MDEHunting av_sites Pivot from downloads detected by Windows Defender Antivirus to other files downloaded from the same sites end (datetime), start (datetime) - +MDEHunting b64_pe Finding base64 encoded PE files header seen in the command line parameters end (datetime), start (datetime) - +MDEHunting brute_force Look for public IP addresses that failed to logon to a computer multiple times, using multiple accounts, and eventually succeeded. end (datetime), start (datetime) - +MDEHunting cve_2018_1000006l Looks for CVE-2018-1000006 exploitation end (datetime), start (datetime) - +MDEHunting cve_2018_1111 Looks for CVE-2018-1111 exploitation end (datetime), start (datetime) - +MDEHunting cve_2018_4878 This query checks for specific processes and domain TLD used in the CVE-2018-4878 end (datetime), start (datetime) - +MDEHunting doc_with_link Looks for a Word document attachment, from which a link was clicked, and after which there was a browser download. end (datetime), start (datetime) - +MDEHunting dropbox_link Looks for user content downloads from dropbox that originate from a link/redirect from a 3rd party site. end (datetime), start (datetime) - +MDEHunting email_link Look for links opened from mail apps – if a detection occurred right afterwards end (datetime), start (datetime) - +MDEHunting email_smartscreen Look for links opened from outlook.exe, followed by a browser download and then a SmartScreen app warning end (datetime), start (datetime) - +MDEHunting malware_recycle Finding attackers hiding malware in the recycle bin. end (datetime), start (datetime) - +MDEHunting network_scans Looking for high volume queries against a given RemoteIP, per ComputerName, RemotePort and Process end (datetime), start (datetime) - +MDEHunting powershell_downloads Finds PowerShell execution events that could involve a download. end (datetime), start (datetime) - +MDEHunting service_account_powershell Service Accounts Performing Remote PowerShell end (datetime), start (datetime) - +MDEHunting smartscreen_ignored Query for SmartScreen URL blocks, where the user has decided to run the malware nontheless. end (datetime), start (datetime) - +MDEHunting smb_discovery Query for processes that accessed more than 10 IP addresses over port 445 (SMB) - possibly scanning for network shares. end (datetime), start (datetime) - +MDEHunting tor Looks for Tor client, or for a common Tor plugin called Meek. end (datetime), start (datetime) - +MDEHunting uncommon_powershell Find which uncommon Powershell Cmdlets were executed on that machine in a certain time period. end (datetime), host_name (str), start (datetime), timestamp (str) - +MDEHunting user_enumeration The query finds attempts to list users or groups using Net commands end (datetime), start (datetime) - +============== ============================= ================================================================================================================================== ================================================================== =================== diff --git a/docs/source/data_acquisition/SplunkProvider.rst b/docs/source/data_acquisition/SplunkProvider.rst index 39819663b..7f439dc75 100644 --- a/docs/source/data_acquisition/SplunkProvider.rst +++ b/docs/source/data_acquisition/SplunkProvider.rst @@ -38,7 +38,7 @@ The settings in the file should look like the following: Splunk: Args: host: splunk_host - port: 8089 + port: '8089' username: splunk_user password: [PLACEHOLDER] @@ -54,7 +54,7 @@ to a Key Vault secret using the MSTICPy configuration editor. Splunk: Args: host: splunk_host - port: 8089 + port: '8089' username: splunk_user password: KeyVault: @@ -67,8 +67,13 @@ Parameter Description host (string) The host name (the default is 'localhost'). username (string) The Splunk account username, which is used to authenticate the Splunk instance. password (string) The password for the Splunk account. +splunkToken (string) The Authorization Bearer Token created in the Splunk. =========== =========================================================================================================================== +The username and password are needed for user account authentication. +On the other hand, splunkToken is needed for Token authentication. +The user auth method has a priority to token auth method if both username and splunkToken are set. + Optional configuration parameters: @@ -106,11 +111,11 @@ in msticpy config file. For more information on how to create new user with appropriate roles and permissions, follow the Splunk documents: -`Securing the Spunk platform `__ +`Securing the Spunk platform `__ and -`About users and roles `__. +`About users and roles `__ The user should have permission to at least run its own searches or more depending upon the actions to be performed by user. @@ -120,10 +125,20 @@ require the following details to specify while connecting: - host = "localhost" (Splunk server FQDN hostname to connect, for locally installed splunk, you can specify localhost) -- port = 8089 (Splunk REST API ) +- port = "8089" (Splunk REST API) - username = "admin" (username to connect to Splunk instance) - password = "yourpassword" (password of the user specified in username) +On the other hand, you can use the authentification token to connect. + +`Create authentication token `__ + +- host = "localhost" (Splunk server FQDN hostname to connect, for locally + installed splunk, you can specify localhost) +- port = "8089" (Splunk REST API) +- splunkToken = "" (token can be used instead of username/password) + + Once you have details, you can specify it in ``msticpyconfig.yaml`` as described earlier. @@ -146,6 +161,11 @@ as parameters to connect. qry_prov.connect(host=, username=, password=) +OR + +.. code:: ipython3 + + qry_prov.connect(host=, splunkToken=) Listing available queries @@ -217,7 +237,7 @@ For more information, see (default value is: | head 100) end: datetime (optional) Query end time - (default value is: 08/26/2017:00:00:00) + (default value is: current time + 1 day) index: str (optional) Splunk index name (default value is: \*) @@ -229,7 +249,7 @@ For more information, see (default value is: \*) start: datetime (optional) Query start time - (default value is: 08/25/2017:00:00:00) + (default value is: current time - 1 day) timeformat: str (optional) Datetime format to use in Splunk query (default value is: "%Y-%m-%d %H:%M:%S.%6N") diff --git a/docs/source/getting_started/Installing.rst b/docs/source/getting_started/Installing.rst index cbbe4a094..c87478afa 100644 --- a/docs/source/getting_started/Installing.rst +++ b/docs/source/getting_started/Installing.rst @@ -51,7 +51,7 @@ created and activated in the prompt. Installation ------------ -Run the following command to install the base configuation of *MSTICPy*. +Run the following command to install the base configuration of *MSTICPy*. ``pip install msticpy`` @@ -71,7 +71,7 @@ known as extras. The syntax for this is: As of version 0.9.0 *MSTICPy* has its dependencies split into extras. This allows you to install only the packages that you -need and avoid the overhead of time and diskspace of dependencies +need and avoid the overhead of time and disk space of dependencies that you do not need. .. note:: extras do not affect the which code from *MSTICPy* is @@ -191,7 +191,7 @@ exception message: .. code:: bash - pip install msticpy[ml] + python -m pip install msticpy[ml] .. note:: In some cases you many not get an informative error. We've tried to trap all of the cases but if @@ -237,3 +237,148 @@ se, and choose the conda file saved earlier with the Spark session configuration - numpy - pip: - msticpy[azure]>=2.3.1 + +Installing for isolated or semi-isolated environments +----------------------------------------------------- + +There are instances in which you may want to use msticpy in an isolated +or semi-isolated environment (e.g. an environment that does not have internet +PyPI access to install packages and dependencies). + +To do this you need to build a transportable archive of MSTICPy and its +dependencies and use that as the source to install from in your target environment. + +We have included a set of scripts to simplify some of this process. These +are available in the `tools folder `__ +of the MSTICPy repo. + +- ``build_wheel_from_targz.py`` - builds wheel files from source tar.gz files +- ``install_all_whl_files.py`` - installs all .whl files in a directory to the target environment +- ``download_python_package.py`` - downloads a python package and its dependencies to a directory. + This script uses docker to perform the download and allows you to build an install + package for a Linux environment from a Windows environment. + +In the instructions below we give both the manual steps and the script-based steps. +The latter are recommended to avoid repetitious typing and to avoid missing files. + +For either manual or script-based installation it is **essential** that you +use the same Python version for both source and target environments, since +pip will download the correct version of the package for the Python version +that it is executing in. We recommend creating a virtual Python or Conda +environment for this purpose (this isn't required for the docker-based +script). + +In order to find the correct python version, you can run the following: + +.. code-block:: powershell + + python --version + + +Windows Source to Isolated Windows Environment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. On your primary Windows machine with internet access create a virtual environment + for the python version you want to use in the target environment. +2. Download msticpy by running the following: + +.. code-block:: powershell + + python -m pip download msticpy --dest \path\to\destination + +Within ``\path\to\destination`` you should see a .whl file for msticpy and the other dependencies. +Some dependencies may not be .whl files, but tar.gz files. +These files will need to be built into .whl files. To do this, run the following +for each tar.gz file: + +.. code-block:: powershell + + python -m pip wheel {file.tar.gz} -w \path\to\destination + +or use the script from MSTICPy repo "tools" folder to process all files +`build_wheel_from_targz.py `__ +to build all the tar.gz files in a directory. + +3. Zip and copy the directory folder to your target environment. + +4. From the Isolated environment, unzip if needed and then run the following for each .whl file: + +.. code-block:: powershell + + python -m pip install "\path\to\destination\{whl_file.whl}" + +.. note:: If you have an issue installing any of the packages you can use the script from + the MSTICPy repo "tools" folder `install_all_whl_files.py `__ + to help. + +5. Test the installation by running msticpy that suits your needs. + + +Linux Source to Isolated Linux Environment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Follow the *Windows Source to Isolated Windows Environment* instructions above. + + +Windows Source to Isolated Linux Environment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. On your source Windows machine with internet access, download + `Docker for Windows `__. + We are using docker to ensure that the wheels that we are downloading are meant for the Linux architecture. + +2. Run the `download_python_package.py + `__ script. + +Example: +.. code-block:: powershell + + python \path\to\python\file --python-version "3.8.5" --module-name "msticpy[sentinel]" --module-version "2.7.0" --directory \path\to\destination + +3. Zip and copy the directory folder to the isolated environment. + +4. From the isolated environment, unzip if needed and then you will need to run the following for each .whl file: + +.. code-block:: powershell + + python -m pip install "\path\to\destination\{whl_file.whl}" + +.. note:: If you have an issue installing any of the packages you can use the script + from MSTICPy repo "tools" folder + `install_all_whl_files.py `__ + to help. + +5. Test the installation by running some MSTICPy operations in a Jupyter notebook. + +If you are installing within a Jupyter Notebooks, you will need to upload your zip file/directory +containing all of the whl files. + +If you zipped your transfer archive and need to unzip source files, run the following: + +.. code-block:: python + + import zipfile + import os + import shutil + file_path = "./{zip_file_name}" + file_name = os.path.split(file_path)[-1] + file_name_without_ext = os.path.splitext(file_name)[0] + with zipfile.ZipFile(file_path, 'r') as zip_ref: + zip_ref.extractall(os.path.join(os.getcwd(), file_name_without_ext)) + + +- To install the whl files, run the following in a cell: + +.. code-block:: python + + import os + directory = "/path/to/whl/files/directory" # edit this to match your directory + files = [ + os.path.join(directory, filename) + for filename in os.listdir(directory) + if filename.endswith(".whl") + ] + for file in files: + filename = os.path.split(file)[-1] + print(f"\nAttempting to install {filename}") + %pip install --quiet --no-index --no-deps --find-links . {file} diff --git a/msticpy/_version.py b/msticpy/_version.py index 2333c41fe..28e81634a 100644 --- a/msticpy/_version.py +++ b/msticpy/_version.py @@ -1,2 +1,2 @@ """Version file.""" -VERSION = "2.7.0.pre1" +VERSION = "2.8.0.pre1" diff --git a/msticpy/context/azure/sentinel_watchlists.py b/msticpy/context/azure/sentinel_watchlists.py index ae54fc889..c0f59f784 100644 --- a/msticpy/context/azure/sentinel_watchlists.py +++ b/msticpy/context/azure/sentinel_watchlists.py @@ -328,5 +328,7 @@ def _check_watchlist_exists( """ # Check requested watchlist actually exists - existing_watchlists = self.list_watchlists()["name"].values - return watchlist_name in existing_watchlists + existing_watchlists = self.list_watchlists() + if existing_watchlists.empty: + return False + return watchlist_name in existing_watchlists["name"].values diff --git a/msticpy/context/tiproviders/intsights.py b/msticpy/context/tiproviders/intsights.py index b1c094e7c..b5bc1f620 100644 --- a/msticpy/context/tiproviders/intsights.py +++ b/msticpy/context/tiproviders/intsights.py @@ -46,42 +46,42 @@ class IntSights(HttpTIProvider): _QUERIES = { "ipv4": _IntSightsParams( - path="/public/v2/iocs/ioc-by-value", + path="/public/v3/iocs/ioc-by-value", params={"iocValue": "{observable}"}, headers=_DEF_HEADERS, ), "ipv6": _IntSightsParams( - path="/public/v2/iocs/ioc-by-value", + path="/public/v3/iocs/ioc-by-value", params={"iocValue": "{observable}"}, headers=_DEF_HEADERS, ), "dns": _IntSightsParams( - path="/public/v2/iocs/ioc-by-value", + path="/public/v3/iocs/ioc-by-value", params={"iocValue": "{observable}"}, headers=_DEF_HEADERS, ), "url": _IntSightsParams( - path="/public/v2/iocs/ioc-by-value", + path="/public/v3/iocs/ioc-by-value", params={"iocValue": "{observable}"}, headers=_DEF_HEADERS, ), "md5_hash": _IntSightsParams( - path="/public/v2/iocs/ioc-by-value", + path="/public/v3/iocs/ioc-by-value", params={"iocValue": "{observable}"}, headers=_DEF_HEADERS, ), "sha1_hash": _IntSightsParams( - path="/public/v2/iocs/ioc-by-value", + path="/public/v3/iocs/ioc-by-value", params={"iocValue": "{observable}"}, headers=_DEF_HEADERS, ), "sha256_hash": _IntSightsParams( - path="/public/v2/iocs/ioc-by-value", + path="/public/v3/iocs/ioc-by-value", params={"iocValue": "{observable}"}, headers=_DEF_HEADERS, ), "email": _IntSightsParams( - path="/public/v2/iocs/ioc-by-value", + path="/public/v3/iocs/ioc-by-value", params={"iocValue": "{observable}"}, headers=_DEF_HEADERS, ), @@ -111,27 +111,28 @@ def parse_results(self, response: Dict) -> Tuple[bool, ResultSeverity, Any]: ): return False, ResultSeverity.information, "Not found." - if response["RawResult"]["Whitelist"] == "True": + if response["RawResult"].get("whitelisted", False): return False, ResultSeverity.information, "Whitelisted." - sev = response["RawResult"]["Severity"] + sev = response["RawResult"].get("severity", "Low") result_dict = { - "threat_actors": response["RawResult"]["RelatedThreatActors"], - "geolocation": response["RawResult"].get("Geolocation", ""), + "threat_actors": response["RawResult"].get("relatedThreatActors", ""), + "geolocation": response["RawResult"].get("geolocation", None), "response_code": response["Status"], - "tags": response["RawResult"]["Tags"] + response["RawResult"]["SystemTags"], - "malware": response["RawResult"]["RelatedMalware"], - "campaigns": response["RawResult"]["RelatedCampaigns"], - "sources": response["RawResult"]["Sources"], - "score": response["RawResult"]["Score"], + "tags": response["RawResult"].get("tags", []) + + response["RawResult"].get("SystemTags", []), + "malware": response["RawResult"].get("relatedMalware", []), + "campaigns": response["RawResult"].get("relatedCampaigns", []), + "score": response["RawResult"].get("score", 0), "first_seen": dt.datetime.strptime( - response["RawResult"]["FirstSeen"], "%Y-%m-%dT%H:%M:%S.%fZ" + response["RawResult"].get("firstSeen", None), "%Y-%m-%dT%H:%M:%S.%fZ" ), "last_seen": dt.datetime.strptime( - response["RawResult"]["LastSeen"], "%Y-%m-%dT%H:%M:%S.%fZ" + response["RawResult"].get("lastSeen", None), "%Y-%m-%dT%H:%M:%S.%fZ" ), "last_update": dt.datetime.strptime( - response["RawResult"]["LastUpdate"], "%Y-%m-%dT%H:%M:%S.%fZ" + response["RawResult"].get("lastUpdateDate", None), + "%Y-%m-%dT%H:%M:%S.%fZ", ), } diff --git a/msticpy/data/drivers/cybereason_driver.py b/msticpy/data/drivers/cybereason_driver.py index ad8115fb3..e414e9d52 100644 --- a/msticpy/data/drivers/cybereason_driver.py +++ b/msticpy/data/drivers/cybereason_driver.py @@ -6,23 +6,30 @@ """Cybereason Driver class.""" import datetime as dt import json +import logging import re -from functools import singledispatch +from asyncio import as_completed, Future +from concurrent.futures import ThreadPoolExecutor +from functools import partial, singledispatch from typing import Any, Dict, List, Optional, Tuple, Union import httpx import pandas as pd +from tqdm.auto import tqdm from ..._version import VERSION from ...common.exceptions import MsticpyUserConfigError from ...common.provider_settings import ProviderArgs, get_provider_settings from ...common.utility import mp_ua_header from ..core.query_defns import Formatters +from ..core.query_provider_connections_mixin import _get_event_loop from .driver_base import DriverBase, DriverProps, QuerySource __version__ = VERSION __author__ = "Florian Bracq" +logger = logging.getLogger(__name__) + _HELP_URI = ( "https://msticpy.readthedocs.io/en/latest/data_acquisition/DataProviders.html" ) @@ -66,8 +73,9 @@ def __init__(self, **kwargs): """ super().__init__(**kwargs) timeout = kwargs.get("timeout", 120) # 2 minutes in milliseconds - max_results = min(kwargs.get("max_results", 1000), 10000) - page_size = min(kwargs.get("page_size", 100), 100) + logger.debug("Set timeout to %d", timeout) + max_results = min(kwargs.get("max_results", 100000), 100000) + logger.debug("Set maximum results to %d", max_results) self.base_url: str = "https://{tenant_id}.cybereason.net" self.auth_endpoint: str = "/login.html" self.req_body: Dict[str, Any] = { @@ -77,7 +85,6 @@ def __init__(self, **kwargs): "perFeatureLimit": 100, "templateContext": "SPECIFIC", "queryTimeout": timeout * 1000, - "pagination": {"pageSize": page_size}, "customFields": [], } self.search_endpoint: str = "/rest/visualsearch/query/simple" @@ -96,6 +103,10 @@ def __init__(self, **kwargs): }, ) + self.set_driver_property(DriverProps.SUPPORTS_THREADING, value=True) + self.set_driver_property( + DriverProps.MAX_PARALLEL, value=kwargs.get("max_threads", 4) + ) self._debug = kwargs.get("debug", False) def query( @@ -118,10 +129,91 @@ def query( the underlying provider result if an error. """ - data, response = self.query_with_results(query) - if isinstance(data, pd.DataFrame): - return data - return response + del query_source + if not self._connected: + raise self._create_not_connected_err(self.__class__.__name__) + + page_size = min(kwargs.get("page_size", 2000), 4000) + logger.debug("Set page size to %d", page_size) + json_query = json.loads(query) + body = {**self.req_body, **json_query} + + # The query must be executed at least once to retrieve the number + # of results and the pagination token. + response = self.__execute_query(body, page_size=page_size) + + total_results = response["data"]["totalResults"] + pagination_token = response["data"]["paginationToken"] + results: Dict[str, Any] = response["data"]["resultIdToElementDataMap"] + + logger.debug("Retrieved %d/%d results", len(results), total_results) + + df_result: pd.DataFrame = None + + if len(results) < total_results: + df_result = self._exec_paginated_queries( + body=body, + page_size=page_size, + pagination_token=pagination_token, + total_results=total_results, + ) + else: + df_result = self._format_result_to_dataframe(result=response) + + return df_result + + def _exec_paginated_queries( + self, + body: Dict[str, Any], + page_size: int, + pagination_token: str, + total_results: int, + **kwargs, + ) -> pd.DataFrame: + """ + Return results of paginated queries. + + Parameters + ---------- + body : Dict[str, Any] + The body of the query to execute. + + Additional Parameters + ---------- + progress: bool, optional + Show progress bar, by default True + retry_on_error: bool, optional + Retry failed queries, by default False + **kwargs : Dict[str, Any] + Additional keyword arguments to pass to the query method. + + Returns + ------- + pd.DataFrame + The concatenated results of all the paginated queries. + + Notes + ----- + This method executes the specified query multiple times to retrieve + all the data from paginated results. + The queries are executed asynchronously. + + """ + progress = kwargs.pop("progress", True) + retry = kwargs.pop("retry_on_error", False) + + query_tasks = self._create_paginated_query_tasks( + body=body, + page_size=page_size, + pagination_token=pagination_token, + total_results=total_results, + ) + + logger.info("Running %s paginated queries.", len(query_tasks)) + event_loop = _get_event_loop() + return event_loop.run_until_complete( + self.__run_threaded_queries(query_tasks, progress, retry) + ) def connect( self, @@ -277,6 +369,136 @@ def _flatten_element_values( result[f"{key}.{subkey}"] = subvalues return result + def _create_paginated_query_tasks( + self, + body: Dict[str, Any], + page_size: int, + pagination_token: str, + total_results: int, + ) -> Dict[str, partial]: + """Return dictionary of partials to execute queries.""" + # Compute the number of queries to execute + total_pages = total_results // page_size + 1 + # The first query (page 0) as to be re-run due to a bug in + # Cybereason API. The first query returns less results than the page size + # when executed without a pagination token. + return { + f"{page}": partial( + self.__execute_query, + body=body, + page_size=page_size, + pagination_token=pagination_token, + page=page, + ) + for page in range(0, total_pages) + } + + def __execute_query( + self, + body: Dict[str, Any], + page: int = 0, + page_size: int = 2000, + pagination_token: str = None, + ) -> Dict[str, Any]: + """ + Run query with pagination enabled. + + Parameters + ---------- + body: Dict[str, Any] + Body of the HTTP Request + page_size: int + Size of the page for results + page: int + Page number to query + pagination_token: str + Token of the current search + + Returns + ------- + Dict[str, Any] + + """ + if pagination_token: + pagination = { + "pagination": { + "pageSize": page_size, + "page": page + 1, + "paginationToken": pagination_token, + "skip": page * page_size, + } + } + headers = {"Pagination-Token": pagination_token} + else: + pagination = {"pagination": {"pageSize": page_size}} + headers = {} + params = {"page": page, "itemsPerPage": page_size} + status = None + while status != "SUCCESS": + response = self.client.post( + self.search_endpoint, + json={**body, **pagination}, + headers=headers, + params=params, + ) + response.raise_for_status() + json_result = response.json() + status = json_result["status"] + return json_result + + async def __run_threaded_queries( + self, + query_tasks: Dict[str, partial], + progress: bool = True, + retry: bool = False, + ) -> pd.DataFrame: + logger.info("Running %d threaded queries.", len(query_tasks)) + event_loop = _get_event_loop() + with ThreadPoolExecutor(max_workers=4) as executor: + results: List[pd.DataFrame] = [] + failed_tasks: Dict[str, Future] = {} + thread_tasks = { + query_id: event_loop.run_in_executor(executor, query_func) + for query_id, query_func in query_tasks.items() + } + if progress: + task_iter = tqdm( + as_completed(thread_tasks.values()), + unit="paginated-queries", + desc="Running", + ) + else: + task_iter = as_completed(thread_tasks.values()) + ids_and_tasks = dict(zip(thread_tasks, task_iter)) + for query_id, thread_task in ids_and_tasks.items(): + try: + result = await thread_task + df_result = self._format_result_to_dataframe(result) + logger.info("Query task '%s' completed successfully.", query_id) + results.append(df_result) + except Exception: # pylint: disable=broad-except + logger.warning( + "Query task '%s' failed with exception", query_id, exc_info=True + ) + failed_tasks[query_id] = thread_task + + if retry and failed_tasks: + for query_id, thread_task in failed_tasks.items(): + try: + logger.info("Retrying query task '%s'", query_id) + result = await thread_task + df_result = self._format_result_to_dataframe(result) + results.append(df_result) + except Exception: # pylint: disable=broad-except + logger.warning( + "Retried query task '%s' failed with exception", + query_id, + exc_info=True, + ) + # Sort the results by the order of the tasks + results = [result for _, result in sorted(zip(thread_tasks, results))] + return pd.concat(results, ignore_index=True) + # pylint: disable=too-many-branches def query_with_results(self, query: str, **kwargs) -> Tuple[pd.DataFrame, Any]: """ @@ -294,64 +516,7 @@ def query_with_results(self, query: str, **kwargs) -> Tuple[pd.DataFrame, Any]: Kql ResultSet. """ - if not self.connected: - self.connect(self.current_connection) - if not self.connected: - raise ConnectionError( - "Source is not connected. ", "Please call connect() and retry." - ) - - if self._debug: - print(query) - - json_query = json.loads(query) - body = self.req_body - body.update(json_query) - response = self.client.post(self.search_endpoint, json=body) - - self._check_response_errors(response) - - json_response = response.json() - if json_response["status"] != "SUCCESS": - print( - "Warning - query did not complete successfully.", - f"Status: {json_response['status']}.", - json_response["message"], - ) - return pd.DataFrame(), json_response - - data = json_response.get("data", json_response) - results = data.get("resultIdToElementDataMap", data) - total_results = data.get("totalResults", len(results)) - guessed_results = data.get("guessedPossibleResults", len(results)) - if guessed_results > len(results): - print( - f"Warning - query returned {total_results} out of {guessed_results}.", - "Check returned response.", - ) - results = [ - dict(CybereasonDriver._flatten_result(values), **{"resultId": result_id}) - for result_id, values in results.items() - ] - - return pd.json_normalize(results), json_response - - # pylint: enable=too-many-branches - - @staticmethod - def _check_response_errors(response): - """Check the response for possible errors.""" - if response.status_code == httpx.codes.OK: - return - print(response.json()["error"]["message"]) - if response.status_code == 401: - raise ConnectionRefusedError( - "Authentication failed - possible ", "timeout. Please re-connect." - ) - # Raise an exception to handle hitting API limits - if response.status_code == 429: - raise ConnectionRefusedError("You have likely hit the API limit. ") - response.raise_for_status() + raise NotImplementedError(f"Not supported for {self.__class__.__name__}") # Parameter Formatting method @staticmethod @@ -373,6 +538,18 @@ def _format_to_datetime(timestamp: int) -> Union[dt.datetime, int]: except TypeError: return timestamp + @staticmethod + def _format_result_to_dataframe(result: Dict[str, Any]) -> pd.DataFrame: + """Return a dataframe from a cybereason result object.""" + df_result = [ + dict( + CybereasonDriver._flatten_result(values), + **{"resultId": result_id}, + ) + for result_id, values in result["data"]["resultIdToElementDataMap"].items() + ] + return pd.json_normalize(df_result) + # Retrieve configuration parameters with aliases @staticmethod def _map_config_dict_name(config_dict: Dict[str, str]): diff --git a/msticpy/data/drivers/splunk_driver.py b/msticpy/data/drivers/splunk_driver.py index 3b4c4a6e9..0754027ad 100644 --- a/msticpy/data/drivers/splunk_driver.py +++ b/msticpy/data/drivers/splunk_driver.py @@ -35,14 +35,14 @@ ) from imp_err __version__ = VERSION -__author__ = "Ashwin Patil" +__author__ = "Ashwin Patil, Tatsuya Hasegawa" logger = logging.getLogger(__name__) SPLUNK_CONNECT_ARGS = { "host": "(string) The host name (the default is 'localhost').", - "port": "(integer) The port number (the default is 8089).", + "port": "(string) The port number (the default is '8089').", "http_scheme": "('https' or 'http') The scheme for accessing the service " + "(the default is 'https').", "verify": "(Boolean) Enable (True) or disable (False) SSL verrification for " @@ -60,6 +60,7 @@ "username": "(string) The Splunk account username, which is used to " + "authenticate the Splunk instance.", "password": "(string) The password for the Splunk account.", + "splunkToken": "(string) The Authorization Bearer Token created in the Splunk.", } @@ -67,8 +68,8 @@ class SplunkDriver(DriverBase): """Driver to connect and query from Splunk.""" - _SPLUNK_REQD_ARGS = ["host", "username", "password"] - _CONNECT_DEFAULTS: Dict[str, Any] = {"port": 8089} + _SPLUNK_REQD_ARGS = ["host"] + _CONNECT_DEFAULTS: Dict[str, Any] = {"port": "8089"} _TIME_FORMAT = '"%Y-%m-%d %H:%M:%S.%6N"' def __init__(self, **kwargs): @@ -79,6 +80,7 @@ def __init__(self, **kwargs): self._connected = False if kwargs.get("debug", False): logger.setLevel(logging.DEBUG) + self._required_params = self._SPLUNK_REQD_ARGS self.set_driver_property( DriverProps.PUBLIC_ATTRS, @@ -142,7 +144,7 @@ def connect(self, connection_str: Optional[str] = None, **kwargs): help_uri="https://msticpy.readthedocs.io/en/latest/DataProviders.html", ) from err self._connected = True - print("connected") + print("Connected.") def _get_connect_args( self, connection_str: Optional[str], **kwargs @@ -172,12 +174,19 @@ def _get_connect_args( elif isinstance(verify_opt, bool): cs_dict["verify"] = verify_opt - missing_args = set(self._SPLUNK_REQD_ARGS) - cs_dict.keys() + # Different required parameters for the REST API authentication method + # between user/pass and authorization bearer token + if "username" in cs_dict: + self._required_params = ["host", "username", "password"] + else: + self._required_params = ["host", "splunkToken"] + + missing_args = set(self._required_params) - cs_dict.keys() if missing_args: raise MsticpyUserConfigError( "One or more connection parameters missing for Splunk connector", ", ".join(missing_args), - f"Required parameters are {', '.join(self._SPLUNK_REQD_ARGS)}", + f"Required parameters are {', '.join(self._required_params)}", "All parameters:", *[f"{arg}: {desc}" for arg, desc in SPLUNK_CONNECT_ARGS.items()], title="no Splunk connection parameters", diff --git a/msticpy/data/queries/m365d/kql_m365_hunting.yaml b/msticpy/data/queries/m365d/kql_m365_hunting.yaml index 69dfb3f36..54e27c7c3 100644 --- a/msticpy/data/queries/m365d/kql_m365_hunting.yaml +++ b/msticpy/data/queries/m365d/kql_m365_hunting.yaml @@ -8,12 +8,6 @@ defaults: metadata: data_source: 'hunting_queries' parameters: - start: - description: Query start time - type: datetime - end: - description: Query end time - type: datetime add_query_items: description: Additional query clauses type: str @@ -413,7 +407,7 @@ sources: makeset(Command), count(), min({time_column}) by AccountName, DeviceName, DeviceId | order by AccountName asc - | where min_Timestamp > ago(1d) + | where min_{time_column} > ago(1d) {add_query_items}' uri: "https://github.com/microsoft/WindowsDefenderATP-Hunting-Queries/blob/master/Lateral%20Movement/ServiceAccountsPerformingRemotePS.txt" accessibility_persistence: diff --git a/msticpy/data/queries/mde/kql_mdatp_hunting.yaml b/msticpy/data/queries/mde/kql_mdatp_hunting.yaml index 0e785ef8d..46f3fc04b 100644 --- a/msticpy/data/queries/mde/kql_mdatp_hunting.yaml +++ b/msticpy/data/queries/mde/kql_mdatp_hunting.yaml @@ -8,12 +8,6 @@ defaults: metadata: data_source: 'hunting_queries' parameters: - start: - description: Query start time - type: datetime - end: - description: Query end time - type: datetime add_query_items: description: Additional query clauses type: str diff --git a/tests/context/test_tiproviders.py b/tests/context/test_tiproviders.py index 2b9d722b8..f2166cd4a 100644 --- a/tests/context/test_tiproviders.py +++ b/tests/context/test_tiproviders.py @@ -885,32 +885,27 @@ def _get_riskiq_classification(): "https://api.ti.insight.rapid7.com": { "ioc_param": "params", "response": { - "Value": "124.5.6.7", - "Type": "IpAddresses", - "Score": 42, - "Severity": "Medium", - "Whitelist": False, - "FirstSeen": dt.datetime.strftime( + "value": "124.5.6.7", + "type": "IpAddresses", + "score": 42, + "severity": "Medium", + "whitelist": False, + "firstSeen": dt.datetime.strftime( dt.datetime.now(), "%Y-%m-%dT%H:%M:%S.%fZ" ), - "LastSeen": dt.datetime.strftime( + "lastSeen": dt.datetime.strftime( dt.datetime.now(), "%Y-%m-%dT%H:%M:%S.%fZ" ), - "LastUpdate": dt.datetime.strftime( + "lastUpdateDate": dt.datetime.strftime( dt.datetime.now(), "%Y-%m-%dT%H:%M:%S.%fZ" ), - "Sources": [ - {"ConfidenceLevel": 2, "Name": "Source A"}, - {"ConfidenceLevel": 1, "Name": "Source B"}, - {"ConfidenceLevel": 1, "Name": "Source C"}, - {"ConfidenceLevel": 3, "Name": "Source D"}, - ], - "SystemTags": ["bot", "malware related"], - "Geolocation": "FR", - "RelatedMalware": ["malware1"], - "RelatedCampaigns": ["Campaign A"], - "RelatedThreatActors": ["Threat Actor 00"], - "Tags": ["tag"], + "systemTags": ["bot", "malware related"], + "geolocation": "FR", + "relatedMalware": ["malware1"], + "relatedCampaigns": ["Campaign A"], + "relatedThreatActors": ["Threat Actor 00"], + "tags": ["tag"], + "whitelisted": False, }, }, "https://cti.api.crowdsec.net": { diff --git a/tests/data/drivers/test_cybereason_driver.py b/tests/data/drivers/test_cybereason_driver.py index c8684a00f..8e1e4a687 100644 --- a/tests/data/drivers/test_cybereason_driver.py +++ b/tests/data/drivers/test_cybereason_driver.py @@ -50,7 +50,9 @@ } }, } - } + }, + "paginationToken": None, + "totalResults": 1, }, "status": "SUCCESS", "message": "", @@ -58,6 +60,85 @@ "failures": 0, } +_CR_PAGINATED_RESULT = [ + { + "data": { + "resultIdToElementDataMap": { + "id1": { + "simpleValues": { + "osType": {"totalValues": 1, "values": ["WINDOWS"]}, + "totalMemory": { + "totalValues": 1, + "values": ["8589463552"], + }, + "group": { + "totalValues": 1, + "values": ["00000000-0000-0000-0000-000000000000"], + }, + "osVersionType": { + "totalValues": 1, + "values": ["Windows_10"], + }, + }, + "elementValues": { + "users": { + "totalValues": 5, + "elementValues": [], + "totalSuspicious": 0, + "totalMalicious": 0, + "guessedTotal": 0, + } + }, + } + }, + "paginationToken": None, + "totalResults": 2, + }, + "status": "SUCCESS", + "message": "", + "expectedResults": 0, + "failures": 0, + }, + { + "data": { + "resultIdToElementDataMap": { + "id2": { + "simpleValues": { + "osType": {"totalValues": 1, "values": ["WINDOWS"]}, + "totalMemory": { + "totalValues": 1, + "values": ["8589463552"], + }, + "group": { + "totalValues": 1, + "values": ["00000000-0000-0000-0000-000000000000"], + }, + "osVersionType": { + "totalValues": 1, + "values": ["Windows_10"], + }, + }, + "elementValues": { + "users": { + "totalValues": 5, + "elementValues": [], + "totalSuspicious": 0, + "totalMalicious": 0, + "guessedTotal": 0, + } + }, + } + }, + "paginationToken": None, + "totalResults": 2, + }, + "status": "SUCCESS", + "message": "", + "expectedResults": 0, + "failures": 0, + }, +] + _CR_QUERY = { "query": """ { @@ -132,9 +213,9 @@ def _cr_pre_checks(driver: CybereasonDriver): @respx.mock def test_connect(driver): """Test connect.""" - connect = respx.post(re.compile(r"https://.*.cybereason.net/login.html")).respond( - 200 - ) + connect = respx.post( + re.compile(r"^https://[a-zA-Z0-9\-]+\.cybereason\.net/login\.html") + ).respond(200) with custom_mp_config(MP_PATH): driver.connect() check.is_true(connect.called) @@ -144,19 +225,49 @@ def test_connect(driver): @respx.mock def test_query(driver): """Test query calling returns data in expected format.""" - connect = respx.post(re.compile(r"https://.*.cybereason.net/login.html")).respond( - 200 - ) + connect = respx.post( + re.compile(r"^https://[a-zA-Z0-9\-]+\.cybereason\.net/login\.html") + ).respond(200) query = respx.post( - re.compile(r"https://.*.cybereason.net/rest/visualsearch/query/simple") + re.compile( + r"^https://[a-zA-Z0-9\-]+\.cybereason\.net/rest/visualsearch/query/simple" + ) ).respond(200, json=_CR_RESULT) with custom_mp_config(MP_PATH): + driver.connect() data = driver.query('{"test": "test"}') check.is_true(connect.called or driver.connected) check.is_true(query.called) check.is_instance(data, pd.DataFrame) +@respx.mock +def test_paginated_query(driver): + """Test query calling returns data in expected format.""" + connect = respx.post( + re.compile(r"^https://[a-zA-Z0-9\-]+\.cybereason\.net/login.html") + ).respond(200) + query1 = respx.post( + re.compile( + r"^https://[a-zA-Z0-9\-]+\.cybereason\.net/rest/visualsearch/query/simple" + ), + params={"page": 0}, + ).respond(200, json=_CR_PAGINATED_RESULT[0]) + query2 = respx.post( + re.compile( + r"^https://[a-zA-Z0-9\-]+\.cybereason\.net/rest/visualsearch/query/simple" + ), + params={"page": 1}, + ).respond(200, json=_CR_PAGINATED_RESULT[1]) + with custom_mp_config(MP_PATH): + driver.connect() + data = driver.query('{"test": "test"}', page_size=1) + check.is_true(connect.called or driver.connected) + check.is_true(query1.called) + check.is_true(query2.called) + check.is_instance(data, pd.DataFrame) + + def test_custom_param_handler(driver): """Test query formatter returns data in expected format.""" query = _CR_QUERY.get("query", "") diff --git a/tools/build_wheel_from_targz.py b/tools/build_wheel_from_targz.py new file mode 100644 index 000000000..266030aa6 --- /dev/null +++ b/tools/build_wheel_from_targz.py @@ -0,0 +1,47 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +"""Script to create PIP Wheels from tar.gz files.""" + +import argparse +import os + +VERSION = "1.0.0" + +__version__ = VERSION +__author__ = "Chris Cianelli" + + +def build_wheel_from_targz(directory: str): + """ + Build wheel files from tar.gz files in a directory. + + Parameters + ---------- + directory: str + Directory containing tar.gz files + + """ + files = [ + os.path.join(directory, filename) + for filename in os.listdir(directory) + if filename.endswith(".tar.gz") + ] + for file in files: + os.system(f"python -m pip wheel {file} -w {directory}") # nosec + os.remove(file) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Build wheel files from tar.gz files in a directory" + ) + parser.add_argument( + "-d", "--directory", help="Directory for saved zip file", required=True + ) + + args = parser.parse_args() + + build_wheel_from_targz(args.directory) diff --git a/tools/download_python_package.py b/tools/download_python_package.py new file mode 100644 index 000000000..aa7b26e87 --- /dev/null +++ b/tools/download_python_package.py @@ -0,0 +1,139 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +"""Create MSTICPy install archive using docker.""" +import argparse +import os +import subprocess +import time + +VERSION = "1.0.0" + +__version__ = VERSION +__author__ = "Chris Cianelli" + + +# pylint: disable=subprocess-run-check + + +def download_python_package( + python_version: str, package_name: str, package_version: str, host_directory: str +): + """ + Download a Python package and its dependencies for local use. + + Parameters + ---------- + python_version: str + Python version to use. Ex: "3.8.5" or "3.9" + package_name: str + Name of the module to download. Ex: "msticpy" + package_version: str + Version of the module to download. Ex: "1.0.0" + host_directory: str + Directory containing tar.gz files + + """ + os.makedirs(host_directory, exist_ok=True) + try: + # Generate a unique tag based on the current timestamp + image_tag = f"{python_version}:{int(time.time())}" + + # get base name if module name includes additional dependencies + module_base_name = package_name.split("[")[0] + + pipstring = ( + f"{package_name}=={package_version}" if package_version else package_name + ) + + # Define Dockerfile content + dockerfile_content = f""" + FROM python:{python_version} + + WORKDIR /{python_version} + + RUN apt-get update && \\ + apt-get install -y zip && \\ + rm -rf /var/lib/apt/lists/* + + ENV PACKAGE_NAME="{package_name}" + ENV PIP_STRING="{pipstring}" + + RUN pip download "$PIP_STRING" -d /{python_version} + + RUN for file in *.tar.gz; do \\ + if [ -f "$file" ]; then \\ + pip wheel "$file" -w /{python_version}; \\ + rm -f "$file"; \\ + fi; \\ + done + + + RUN zip -j /{python_version}/py{python_version}_$PACKAGE_NAME.zip /{python_version}/*.whl + + # Remove the wheel files + RUN rm -f /{python_version}/*.whl + RUN rm -f /{python_version}/*.tar.gz + + ENTRYPOINT ["echo", "Docker tasks completed."] + """ + + # Write Dockerfile content to a file + with open("Dockerfile", "w", encoding="utf-8") as dockerfile: + dockerfile.write(dockerfile_content) + + # Build Docker image with a unique tag + docker_build_cmd = ["docker", "build", "-t", image_tag, "."] + subprocess.run(docker_build_cmd, check=True) # nosec + + # Run Docker container, copy files to temporary directory, and remove it after it's done + docker_run_cmd = [ + "docker", + "run", + "-v", + f"./{python_version}:/{python_version}", # Bind-mount the temporary directory + "--name", + f"{module_base_name}", + image_tag, + ] + subprocess.run(docker_run_cmd, check=True) + + print("copying files") + + subprocess.run( # nosec + ["docker", "cp", f"{module_base_name}:/{python_version}", host_directory], + check=True, + ) + + print("removing container") + + finally: + # Delete the Docker volume + subprocess.run(["docker", "rm", f"{module_base_name}"]) # nosec + + subprocess.run(["docker", "volume", "rm", f"{python_version}"]) # nosec + + # Delete the Docker image + subprocess.run(["docker", "rmi", image_tag]) # nosec + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Download Python package for local use" + ) + parser.add_argument("-v", "--python-version", help="Python version", required=True) + parser.add_argument( + "-d", "--directory", help="Directory for saved zip file", required=True + ) + parser.add_argument("-p", "--package-name", help="Package name", required=True) + parser.add_argument( + "-pv", "--package-version", help="Package version", required=False + ) + + args = parser.parse_args() + + download_python_package( + args.python_version, args.package_name, args.package_version, args.directory + ) diff --git a/tools/install_all_whl_files.py b/tools/install_all_whl_files.py new file mode 100644 index 000000000..5f7afa97a --- /dev/null +++ b/tools/install_all_whl_files.py @@ -0,0 +1,49 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +"""Script for installing wheel files to isolated environment.""" + +import argparse +import os + +VERSION = "1.0.0" + +__version__ = VERSION +__author__ = "Chris Cianelli" + + +def install_all_whl_files(directory: str): + """ + Install all wheel files in a directory. + + Parameters + ---------- + directory: str + Directory containing wheel files + + """ + files = [ + os.path.join(directory, filename) + for filename in os.listdir(directory) + if filename.endswith(".whl") + ] + for file in files: + os.system( # nosec + f"python -m pip install --quiet --no-index --no-deps --find-links . {file} --user" + ) + print(f"Installed {os.path.split(file)[-1]}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Build wheel files from tar.gz files in a directory" + ) + parser.add_argument( + "-d", "--directory", help="Directory for saved zip file", required=True + ) + + args = parser.parse_args() + + install_all_whl_files(args.directory)