Merge branch 'main' into cherry-pick-ce-commit-966eb99716a638cea4b033…

…66fdc377e37d4150e3
Multiwoven · Sep 5, 2024 · 2accf47 · 2accf47
2 parents c840e86 + 0cf9c09
commit 2accf47
Show file tree

Hide file tree

Showing 256 changed files with 7,177 additions and 1,920 deletions.
diff --git a/.github/workflows/integrations-ci.yml b/.github/workflows/integrations-ci.yml
@@ -31,6 +31,14 @@ jobs:
             sudo mv libduckdb/libduckdb.so /usr/local/lib
             sudo ldconfig /usr/local/lib
 
+      - name: Download and Install Oracle Instant Client
+        run: |
+          sudo apt-get install -y libaio1 alien
+          wget http://yum.oracle.com/repo/OracleLinux/OL7/oracle/instantclient/x86_64/getPackage/oracle-instantclient19.6-basic-19.6.0.0.0-1.x86_64.rpm
+          wget http://yum.oracle.com/repo/OracleLinux/OL7/oracle/instantclient/x86_64/getPackage/oracle-instantclient19.6-devel-19.6.0.0.0-1.x86_64.rpm
+          sudo alien -i --scripts oracle-instantclient*.rpm
+          rm -f oracle-instantclient*.rpm
+
       - name: Install dependencies
         run: |
           gem install bundler

diff --git a/.github/workflows/integrations-main.yml b/.github/workflows/integrations-main.yml
@@ -39,6 +39,14 @@ jobs:
             sudo mv libduckdb/libduckdb.so /usr/local/lib
             sudo ldconfig /usr/local/lib
 
+      - name: Download and Install Oracle Instant Client
+        run: |
+          sudo apt-get install -y libaio1 alien
+          wget http://yum.oracle.com/repo/OracleLinux/OL7/oracle/instantclient/x86_64/getPackage/oracle-instantclient19.6-basic-19.6.0.0.0-1.x86_64.rpm
+          wget http://yum.oracle.com/repo/OracleLinux/OL7/oracle/instantclient/x86_64/getPackage/oracle-instantclient19.6-devel-19.6.0.0.0-1.x86_64.rpm
+          sudo alien -i --scripts oracle-instantclient*.rpm
+          rm -f oracle-instantclient*.rpm
+
       - name: Install dependencies
         run: bundle install
         working-directory: ./integrations

diff --git a/.github/workflows/server-ci.yml b/.github/workflows/server-ci.yml
@@ -50,6 +50,14 @@ jobs:
             sudo mv libduckdb/libduckdb.so /usr/local/lib
             sudo ldconfig /usr/local/lib
 
+      - name: Download and Install Oracle Instant Client
+        run: |
+          sudo apt-get install -y libaio1 alien
+          wget http://yum.oracle.com/repo/OracleLinux/OL7/oracle/instantclient/x86_64/getPackage/oracle-instantclient19.6-basic-19.6.0.0.0-1.x86_64.rpm
+          wget http://yum.oracle.com/repo/OracleLinux/OL7/oracle/instantclient/x86_64/getPackage/oracle-instantclient19.6-devel-19.6.0.0.0-1.x86_64.rpm
+          sudo alien -i --scripts oracle-instantclient*.rpm
+          rm -f oracle-instantclient*.rpm
+
       - name: Bundle Install
         run: bundle install
         working-directory: ./server

diff --git a/integrations/Gemfile b/integrations/Gemfile
@@ -67,6 +67,12 @@ gem "mysql2"
 
 gem "aws-sdk-sts"
 
+gem "ruby-oci8", "~> 2.2.12"
+
+gem "aws-sdk-sagemaker"
+
+gem "aws-sdk-sagemakerruntime"
+
 group :development, :test do
   gem "simplecov", require: false
   gem "simplecov_json_formatter", require: false

diff --git a/integrations/Gemfile.lock b/integrations/Gemfile.lock
@@ -7,12 +7,14 @@ GIT
 PATH
   remote: .
   specs:
-    multiwoven-integrations (0.5.2)
+    multiwoven-integrations (0.10.0)
       activesupport
       async-websocket
       aws-sdk-athena
+      aws-sdk-cloudwatchlogs
       aws-sdk-s3
       aws-sdk-sts
+      aws-sigv4
       csv
       dry-schema
       dry-struct
@@ -28,6 +30,7 @@ PATH
       rake
       restforce
       ruby-limiter
+      ruby-oci8
       ruby-odbc
       rubyzip
       sequel
@@ -66,6 +69,9 @@ GEM
     aws-sdk-athena (1.83.0)
       aws-sdk-core (~> 3, >= 3.193.0)
       aws-sigv4 (~> 1.1)
+    aws-sdk-cloudwatchlogs (1.82.0)
+      aws-sdk-core (~> 3, >= 3.193.0)
+      aws-sigv4 (~> 1.1)
     aws-sdk-core (3.196.1)
       aws-eventstream (~> 1, >= 1.3.0)
       aws-partitions (~> 1, >= 1.651.0)
@@ -78,6 +84,12 @@ GEM
       aws-sdk-core (~> 3, >= 3.194.0)
       aws-sdk-kms (~> 1)
       aws-sigv4 (~> 1.8)
+    aws-sdk-sagemaker (1.229.0)
+      aws-sdk-core (~> 3, >= 3.188.0)
+      aws-sigv4 (~> 1.1)
+    aws-sdk-sagemakerruntime (1.63.0)
+      aws-sdk-core (~> 3, >= 3.193.0)
+      aws-sigv4 (~> 1.1)
     aws-sdk-sts (1.11.0)
       aws-sdk-core (~> 3, >= 3.110.0)
       aws-sigv4 (~> 1.1)
@@ -275,6 +287,8 @@ GEM
     rubocop-ast (1.31.3)
       parser (>= 3.3.1.0)
     ruby-limiter (2.3.0)
+    ruby-oci8 (2.2.12)
+    ruby-oci8 (2.2.12-x64-mingw-ucrt)
     ruby-progressbar (1.13.0)
     ruby2_keywords (0.0.5)
     rubyzip (2.3.2)
@@ -335,6 +349,8 @@ DEPENDENCIES
   async-websocket (~> 0.8.0)
   aws-sdk-athena
   aws-sdk-s3
+  aws-sdk-sagemaker
+  aws-sdk-sagemakerruntime
   aws-sdk-sts
   byebug
   csv
@@ -357,6 +373,7 @@ DEPENDENCIES
   rspec (~> 3.0)
   rubocop (~> 1.21)
   ruby-limiter
+  ruby-oci8 (~> 2.2.12)
   ruby-odbc!
   rubyzip
   sequel

diff --git a/integrations/README.md b/integrations/README.md
@@ -59,6 +59,10 @@ Before you begin the installation, ensure you have the following dependencies in
   - Command: `brew install openssl@3`
   - Description: Essential for secure communication.
 
+- **Oracle Instant Client**
+  - Download Link: https://www.oracle.com/database/technologies/instant-client/downloads.html
+  - Description: Required for database interactions.
+
 
 ### Installation
 

diff --git a/integrations/lib/multiwoven/integrations.rb b/integrations/lib/multiwoven/integrations.rb
@@ -31,6 +31,9 @@
 require "duckdb"
 require "iterable-api-client"
 require "aws-sdk-sts"
+require "ruby-oci8"
+require "aws-sdk-sagemaker"
+require "aws-sdk-sagemakerruntime"
 
 # Service
 require_relative "integrations/config"
@@ -60,6 +63,9 @@
 require_relative "integrations/source/clickhouse/client"
 require_relative "integrations/source/amazon_s3/client"
 require_relative "integrations/source/maria_db/client"
+require_relative "integrations/source/oracle_db/client"
+require_relative "integrations/source/databrics_model/client"
+require_relative "integrations/source/aws_sagemaker_model/client"
 
 # Destination
 require_relative "integrations/destination/klaviyo/client"
@@ -78,6 +84,8 @@
 require_relative "integrations/destination/iterable/client"
 require_relative "integrations/destination/maria_db/client"
 require_relative "integrations/destination/databricks_lakehouse/client"
+require_relative "integrations/destination/oracle_db/client"
+require_relative "integrations/destination/microsoft_excel/client"
 
 module Multiwoven
   module Integrations

diff --git a/integrations/lib/multiwoven/integrations/core/base_connector.rb b/integrations/lib/multiwoven/integrations/core/base_connector.rb
@@ -63,7 +63,16 @@ def success_status
       end
 
       def failure_status(error)
-        ConnectionStatus.new(status: ConnectionStatusType["failed"], message: error.message).to_multiwoven_message
+        message = error&.message || "failed"
+        ConnectionStatus.new(status: ConnectionStatusType["failed"], message: message).to_multiwoven_message
+      end
+
+      def auth_headers(access_token)
+        {
+          "Accept" => "application/json",
+          "Authorization" => "Bearer #{access_token}",
+          "Content-Type" => "application/json"
+        }
       end
     end
   end

diff --git a/integrations/lib/multiwoven/integrations/core/constants.rb b/integrations/lib/multiwoven/integrations/core/constants.rb
@@ -36,14 +36,26 @@ module Constants
       AIRTABLE_BASES_ENDPOINT = "https://api.airtable.com/v0/meta/bases"
       AIRTABLE_GET_BASE_SCHEMA_ENDPOINT = "https://api.airtable.com/v0/meta/bases/{baseId}/tables"
 
-      AWS_ACCESS_KEY_ID = ENV["AWS_ACCESS_KEY_ID"]
-      AWS_SECRET_ACCESS_KEY = ENV["AWS_SECRET_ACCESS_KEY"]
+      MS_EXCEL_AUTH_ENDPOINT = "https://graph.microsoft.com/v1.0/me"
+      MS_EXCEL_TABLE_ROW_WRITE_API = "https://graph.microsoft.com/v1.0/drives/%<drive_id>s/items/%<item_id>s/"\
+      "workbook/worksheets/%<sheet_name>s/tables/%<table_name>s/rows"
+      MS_EXCEL_TABLE_API = "https://graph.microsoft.com/v1.0/drives/%<drive_id>s/items/%<item_id>s/workbook/"\
+      "worksheets/%<sheet_name>s/tables?$select=name"
+      MS_EXCEL_FILES_API = "https://graph.microsoft.com/v1.0/drives/%<drive_id>s/root/children"
+      MS_EXCEL_WORKSHEETS_API = "https://graph.microsoft.com/v1.0/drives/%<drive_id>s/items/%<item_id>s/"\
+      "workbook/worksheets"
+      MS_EXCEL_SHEET_RANGE_API = "https://graph.microsoft.com/v1.0/drives/%<drive_id>s/items/%<item_id>s/"\
+      "workbook/worksheets/%<sheet_name>s/range(address='A1:Z1')/usedRange?$select=values"
+
+      DATABRICKS_HEALTH_URL  = "https://%<databricks_host>s/api/2.0/serving-endpoints/%<endpoint_name>s"
+      DATABRICKS_SERVING_URL = "https://%<databricks_host>s/serving-endpoints/%<endpoint_name>s/invocations"
 
       # HTTP
       HTTP_GET = "GET"
       HTTP_POST = "POST"
       HTTP_PUT = "PUT"
       HTTP_DELETE = "DELETE"
+      HTTP_PATCH = "PATCH"
 
       # google sheets
       GOOGLE_SHEETS_SCOPE = "https://www.googleapis.com/auth/drive"

diff --git a/integrations/lib/multiwoven/integrations/core/http_client.rb b/integrations/lib/multiwoven/integrations/core/http_client.rb
@@ -19,13 +19,14 @@ def build_request(method, uri, payload, headers)
                           when Constants::HTTP_GET then Net::HTTP::Get
                           when Constants::HTTP_POST then Net::HTTP::Post
                           when Constants::HTTP_PUT then Net::HTTP::Put
+                          when Constants::HTTP_PATCH then Net::HTTP::Patch
                           when Constants::HTTP_DELETE then Net::HTTP::Delete
                           else raise ArgumentError, "Unsupported HTTP method: #{method}"
                           end
 
           request = request_class.new(uri)
           headers.each { |key, value| request[key] = value }
-          request.body = payload.to_json if payload && %w[POST PUT].include?(method.upcase)
+          request.body = payload.to_json if payload && %w[POST PUT PATCH].include?(method.upcase)
           request
         end
       end

diff --git a/integrations/lib/multiwoven/integrations/destination/airtable/client.rb b/integrations/lib/multiwoven/integrations/destination/airtable/client.rb
@@ -59,10 +59,12 @@ def write(sync_config, records, _action = "create")
             connection_config = sync_config.destination.connection_specification.with_indifferent_access
             api_key = connection_config[:api_key]
             url = sync_config.stream.url
+            log_message_array = []
             write_success = 0
             write_failure = 0
             records.each_slice(MAX_CHUNK_SIZE) do |chunk|
               payload = create_payload(chunk)
+              args = [sync_config.stream.request_method, url, payload]
               response = Multiwoven::Integrations::Core::HttpClient.request(
                 url,
                 sync_config.stream.request_method,
@@ -74,6 +76,7 @@ def write(sync_config, records, _action = "create")
               else
                 write_failure += chunk.size
               end
+              log_message_array << log_request_response("info", args, response)
             rescue StandardError => e
               handle_exception(e, {
                                  context: "AIRTABLE:RECORD:WRITE:EXCEPTION",
@@ -82,13 +85,9 @@ def write(sync_config, records, _action = "create")
                                  sync_run_id: sync_config.sync_run_id
                                })
               write_failure += chunk.size
+              log_message_array << log_request_response("error", args, e.message)
             end
-
-            tracker = Multiwoven::Integrations::Protocol::TrackingMessage.new(
-              success: write_success,
-              failed: write_failure
-            )
-            tracker.to_multiwoven_message
+            tracking_message(write_success, write_failure, log_message_array)
           rescue StandardError => e
             handle_exception(e, {
                                context: "AIRTABLE:RECORD:WRITE:EXCEPTION",
@@ -110,16 +109,8 @@ def create_payload(records)
             }
           end
 
-          def auth_headers(access_token)
-            {
-              "Accept" => "application/json",
-              "Authorization" => "Bearer #{access_token}",
-              "Content-Type" => "application/json"
-            }
-          end
-
           def base_id_exists?(bases, base_id)
-            return if extract_data(bases).any? { |base| base["id"] == base_id }
+            return if extract_bases(bases).any? { |base| base["id"] == base_id }
 
             raise ArgumentError, "base_id not found"
           end

diff --git a/integrations/lib/multiwoven/integrations/destination/airtable/config/meta.json b/integrations/lib/multiwoven/integrations/destination/airtable/config/meta.json
@@ -1,7 +1,7 @@
 {
   "data": {
     "name": "Airtable",
-    "title": "airtable",
+    "title": "Airtable",
     "connector_type": "destination",
     "category": "Productivity Tools",
     "documentation_url": "https://docs.multiwoven.com/destinations/productivity-tools/airtable",

diff --git a/integrations/lib/multiwoven/integrations/destination/databricks_lakehouse/config/meta.json b/integrations/lib/multiwoven/integrations/destination/databricks_lakehouse/config/meta.json
@@ -1,9 +1,9 @@
 {
   "data": {
     "name": "DatabricksLakehouse",
-    "title": "Databricks Lakehouse",
+    "title": "Databricks Data Warehouse",
     "connector_type": "destination",
-    "category": "Marketing Automation",
+    "category": "Database",
     "documentation_url": "https://docs.multiwoven.com/destinations/databricks_lakehouse",
     "github_issue_label": "destination-databricks-lakehouse",
     "icon": "icon.svg",

diff --git a/integrations/lib/multiwoven/integrations/destination/databricks_lakehouse/config/spec.json b/integrations/lib/multiwoven/integrations/destination/databricks_lakehouse/config/spec.json
@@ -3,7 +3,7 @@
   "stream_type": "static",
   "connection_specification": {
     "$schema": "http://json-schema.org/draft-07/schema#",
-    "title": "Databricks Lakehouse",
+    "title": "Databricks Datawarehouse",
     "type": "object",
     "required": ["host", "api_token", "warehouse_id", "catalog", "schema"],
     "properties": {

diff --git a/integrations/lib/multiwoven/integrations/destination/facebook_custom_audience/client.rb b/integrations/lib/multiwoven/integrations/destination/facebook_custom_audience/client.rb
@@ -110,14 +110,6 @@ def extract_schema_and_data(records, json_schema)
         [schema, data]
       end
 
-      def auth_headers(access_token)
-        {
-          "Accept" => "application/json",
-          "Authorization" => "Bearer #{access_token}",
-          "Content-Type" => "application/json"
-        }
-      end
-
       def ad_account_exists?(response, ad_account_id)
         return if extract_data(response).any? { |ad_account| ad_account["id"] == "act_#{ad_account_id}" }
 

diff --git a/...rations/lib/multiwoven/integrations/destination/facebook_custom_audience/config/meta.json b/...rations/lib/multiwoven/integrations/destination/facebook_custom_audience/config/meta.json
@@ -3,7 +3,7 @@
     "name": "FacebookCustomAudience",
     "title": "Facebook Custom Audiences",
     "connector_type": "destination",
-    "category": "Adtech",
+    "category": "Ad-Tech",
     "documentation_url": "https://docs.mutliwoven.com",
     "github_issue_label": "destination-facebook",
     "icon": "icon.svg",

diff --git a/integrations/lib/multiwoven/integrations/destination/google_sheets/client.rb b/integrations/lib/multiwoven/integrations/destination/google_sheets/client.rb
@@ -153,13 +153,15 @@ def extract_spreadsheet_id(link)
 
           # Batch has a limit of sending 2MB data. So creating a chunk of records to meet that limit
           def process_record_chunks(records, sync_config)
+            log_message_array = []
             write_success = 0
             write_failure = 0
 
             records.each_slice(MAX_CHUNK_SIZE) do |chunk|
               values = prepare_chunk_values(chunk, sync_config.stream)
-              update_sheet_values(values, sync_config.stream.name)
+              request, response = *update_sheet_values(values, sync_config.stream.name)
               write_success += values.size
+              log_message_array << log_request_response("info", request, response)
             rescue StandardError => e
               handle_exception(e, {
                                  context: "GOOGLE_SHEETS:RECORD:WRITE:EXCEPTION",
@@ -168,9 +170,9 @@ def process_record_chunks(records, sync_config)
                                  sync_run_id: sync_config.sync_run_id
                                })
               write_failure += chunk.size
+              log_message_array << log_request_response("error", request, e.message)
             end
-
-            tracking_message(write_success, write_failure)
+            tracking_message(write_success, write_failure, log_message_array)
           end
 
           # We need to format the data to adhere to google sheets API format. This converts the sync mapped data to 2D array format expected by google sheets API
@@ -199,19 +201,14 @@ def update_sheet_values(values, stream_name)
             )
 
             # TODO: Remove & this is added for the test to pass we need
-            @client&.batch_update_values(@spreadsheet_id, batch_update_request)
+            response = @client&.batch_update_values(@spreadsheet_id, batch_update_request)
+            [batch_update_request, response]
           end
 
           def load_catalog
             read_json(CATALOG_SPEC_PATH)
           end
 
-          def tracking_message(success, failure)
-            Multiwoven::Integrations::Protocol::TrackingMessage.new(
-              success: success, failed: failure
-            ).to_multiwoven_message
-          end
-
           def delete_extra_sheets(sheet_ids)
             # Leave one sheet intact as a spreadsheet must have at least one sheet.
             # Delete all other sheets.