From a8f47ce0bd0a8296120f7fe14dd531658e305a97 Mon Sep 17 00:00:00 2001 From: prinz <281208364@qq.com> Date: Wed, 20 Sep 2023 14:29:35 +0800 Subject: [PATCH 1/5] add new xiantianneng-docs --- .../BI-Connection/FineBI-connection.md | 270 ++++++++++++++++++ .../BI-Connection/Superset-connection.md | 217 ++++++++++++++ .../BI-Connection/yonghong-connection.md | 50 ++++ .../Computing-Engine/DataX-write.md | 212 ++++++++++++++ .../Computing-Engine/seatunnel-write.md | 131 +++++++++ .../how-to-use.md | 2 +- docs/MatrixOne/Develop/read-data/cte.md | 2 +- .../backup-restore/modump-backup-restore.md | 6 +- mkdocs.yml | 8 + 9 files changed, 893 insertions(+), 5 deletions(-) create mode 100644 docs/MatrixOne/Develop/Ecological-Tools/BI-Connection/FineBI-connection.md create mode 100644 docs/MatrixOne/Develop/Ecological-Tools/BI-Connection/Superset-connection.md create mode 100644 docs/MatrixOne/Develop/Ecological-Tools/BI-Connection/yonghong-connection.md create mode 100644 docs/MatrixOne/Develop/Ecological-Tools/Computing-Engine/DataX-write.md create mode 100644 docs/MatrixOne/Develop/Ecological-Tools/Computing-Engine/seatunnel-write.md diff --git a/docs/MatrixOne/Develop/Ecological-Tools/BI-Connection/FineBI-connection.md b/docs/MatrixOne/Develop/Ecological-Tools/BI-Connection/FineBI-connection.md new file mode 100644 index 000000000..5104a665d --- /dev/null +++ b/docs/MatrixOne/Develop/Ecological-Tools/BI-Connection/FineBI-connection.md @@ -0,0 +1,270 @@ +# Visualizing MatrixOne Data with FineBI + +## Overview + +FineBI is a next-generation big data analytics tool that empowers business professionals to gain deep insights and leverage their data. In FineBI, users can easily create diverse visualizations, analyze data freely, and explore their datasets. FineBI boasts various data connectivity features and can be used to build complex reports constructing data-driven decision analysis systems. It finds wide application in corporate management, production control, financial intelligence, and sales operations. + +MatrixOne supports integration with the data visualization tool FineBI. This article will guide you on connecting to the standalone version of MatrixOne using FineBI and creating various visual data reports, assembling them into dashboards for data analysis and exploration. + +## Before you start + +- Completed the [installation and startup of MatrixOne](../../../Get-Started/install-standalone-matrixone.md). + +- Installed [FineBI](https://help.fanruan.com/finebi/doc-view-260.html?source=5) and performed [FineBI initial setup](https://help.fanruan.com/finebi/doc-view-262.html). + +!!! note + The FineBI version used in the operations shown in this document is FineBI Linux 6.0. You can choose to install the Linux_unix_FineBI6_0-CN.sh package. + +## Connecting to MatrixOne Service via FineBI + +1. After logging into FineBI, select **Management System > Data Connection > Data Connection Management > New Data Connection** as shown below, then choose **MySQL**: + + ![image-20230808174909411](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/finebi/select-mysql.png) + +2. Fill in the MatrixOne connection configuration, including the database name, host, port, username, and password. Other parameters can be left at their default settings. You can click the **Test Connection** button to verify if the connection is functional and then click **Save** : + + ![image-20230808182330603](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/finebi/testing.png) + +## Creating Visual Reports Using MatrixOne Data + +1. Create Demo Data: + + First, log in to the MatrixOne database and execute the following SQL statements to create the necessary data tables and views for the demo: + + ```sql + create database orders; + use orders; + CREATE TABLE `category` (`product_category_name` VARCHAR(255) DEFAULT NULL, + `product_category_name_english` VARCHAR(255) DEFAULT NULL ); + CREATE TABLE `item` (`order_id` VARCHAR(255) NOT NULL, `order_item_id` INT DEFAULT null, + `product_id` VARCHAR(255) DEFAULT null, + `seller_id` VARCHAR(255) DEFAULT null, `shipping_limit_date` DATETIME DEFAULT null, + `price` DECIMAL(10,2) DEFAULT null, + `freight_value` DECIMAL(10,2) DEFAULT null + ); + CREATE TABLE `review` ( + `review_id` VARCHAR(255) NOT NULL, + `order_id` VARCHAR(255) DEFAULT null, + `review_score` TINYINT DEFAULT null, + `review_comment_title` VARCHAR(255) DEFAULT null, + `review_comment_message` TEXT DEFAULT null, + `review_creation_date` DATETIME DEFAULT null, + `review_answer_timestamp` DATETIME DEFAULT null, + PRIMARY KEY (`review_id`) + ); + CREATE TABLE `order_time` ( + `order_id` VARCHAR(255) NOT NULL, + `customer_id` VARCHAR(255) DEFAULT null, + `y` INT DEFAULT null, + `q` INT DEFAULT null, + `m` INT DEFAULT null, + `d` DATE DEFAULT null, + `h` INT DEFAULT null, + `order_purchase_timestamp` DATETIME DEFAULT null + ); + CREATE TABLE `orders` ( + `order_id` VARCHAR(255) NOT NULL, + `customer_id` VARCHAR(255) DEFAULT null, + `order_status` VARCHAR(255) DEFAULT null, + `order_purchase_timestamp` DATETIME DEFAULT null, + `order_approved_at` DATETIME DEFAULT null, + `order_delivered_carrier_date` DATETIME DEFAULT null, + `order_delivered_customer_date` DATETIME DEFAULT null, + `order_estimated_delivery_date` DATETIME DEFAULT null, + PRIMARY KEY (`order_id`) + ); + CREATE TABLE `product` ( + `product_id` VARCHAR(255) NOT NULL, + `product_category_name` VARCHAR(255) DEFAULT null, + `product_name_lenght` INT DEFAULT null, + `product_description_lenght` INT DEFAULT null, + `product_photos_qty` INT DEFAULT null, + `product_weight_g` INT DEFAULT null, + `product_length_cm` INT DEFAULT null, + `product_height_cm` INT DEFAULT null, + `product_width_cm` INT DEFAULT null, + PRIMARY KEY (`product_id`) + ); + CREATE TABLE `rfm` ( + `customer_id` VARCHAR(255) DEFAULT null, + `user_type` VARCHAR(255) DEFAULT null, + `shijian` DATE DEFAULT null + ); + + CREATE view total_order_value as select t.order_id,product_id,seller_id,(price*total)+(freight_value*total) as order_value from (select order_id,count(*) as total from item group by order_id) t join item on t.order_id=item.order_id; + + CREATE view order_detail as select a.order_id,product_id,seller_id, customer_id,round(order_value,2) as order_value, y,q,m,d,h,order_purchase_timestamp from total_order_value a inner join order_time b on a.order_id=b.order_id; + ``` + + Next, use the following SQL import statements to import the prepared demo data into the respective tables of the MatrixOne database. + + !!! note + Please note that the path `/root/data/table_name.csv` is the path to the data files for each table. You can generate your data following a similar process. + + ```sql + use orders; + load data local infile '/root/data/category.csv' into table category FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY "\r\n"; + load data local infile '/root/data/review.csv' into table review FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY "\r\n"; + load data local infile '/root/data/product.csv' into table product FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY "\r\n"; + load data local infile '/root/data/item.csv' into table item FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY "\r\n"; + load data local infile '/root/data/order_time.csv' into table order_time FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY "\r\n"; + load data local infile '/root/data/orders.csv' into table orders FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY "\r\n"; + load data local infile '/root/data/rfm.csv' into table rfm FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY "\r\n"; + ``` + +2. Add Data Sets: + + In FineBI, click **Public Data**, then click **New Folder** to create and select a folder. After that, click **New Data Set**, choose **SQL Data Set**, and add the SQL query to the selected folder. Enter the dataset name and input the SQL query as shown below: + + ```sql + select d, + count(order_id) as order_num, + count(DISTINCT customer_id) + from orders.order_detail + group by d + order by d + ``` + + You can click the **Preview** button to view the results of the SQL query and then click **OK** to save it: + + ![image-20230809091306270](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/finebi/preview.png) + + Below are examples of all the query SQL used in this demo: + + ```sql + -- Daily active users and order count + select d, + count(order_id) as order_num, + count(DISTINCT customer_id) + from orders.order_detail + group by d + order by d + + -- Monthly active users and order count + select count(DISTINCT customer_id), + count(order_id), + concat(y, '-', m) + from orders.order_detail + group by y,m + order by y,m + + -- Active users and order count in different periods + select h, + count(DISTINCT customer_id), + count(order_id) order_num + from orders.order_detail + group by h + order by h + + -- User count by type + SELECT count(*), + user_type + from orders.rfm + GROUP BY user_type + + -- Monthly GMV + select y,m, + sum(order_value), + concat(y, "-", m) month + from orders.order_detail + group by y,m + order by y,m + + -- Quarterly GMV + select y,q, + sum(order_value) gmv, + concat(y, "季度", q) as quator + from orders.order_detail + group by y,q + order by concat(y, "季度", q) asc + + -- Quarterly ARPU + select y,q, + round((sum(order_value)/count(DISTINCT customer_id)),2) arpu, + concat(y, "季度", q) as quator + from orders.order_detail + group by y,q + order by y,q + + -- Monthly ARPU + select y,m, + round((sum(order_value)/count(DISTINCT customer_id)),2) arpu, + concat(y, "-", m) as month + from orders.order_detail + group by y,m + order by y,m + + -- Important retained users' popularity index + SELECT e.product_category_name_english good_type, + SUM(a.order_value) ordder_total_value, + ROUND(AVG(c.review_score), 2) good_review_score, + (0.7*SUM(a.order_value)+0.3*10000*ROUND(AVG(c.review_score), 7)) + top_rank_rate + FROM orders.order_detail a + INNER JOIN + (SELECT customer_id + from orders.rfm + WHERE user_type='重要挽留用户' ) as b ON a.customer_id=b.customer_id + LEFT JOIN orders.review c ON a.order_id=c.order_id + LEFT JOIN orders.product d ON a.product_id=d.product_id + LEFT JOIN orders.category e ON d.product_category_name=e.product_category_name + where e.product_category_name_english is not NULL + GROUP BY e.product_category_name_english limit 50 + + -- General retained users' popularity index + SELECT e.product_category_name_english good_type, + SUM(a.order_value) ordder_total_value, + ROUND(AVG(c.review_score), 2) good_review_score, + (0.7*SUM(a.order_value)+0.3*10000*ROUND(AVG(c.review_score), 7)) + top_rank_rate + FROM orders.order_detail a + INNER JOIN + (SELECT customer_id from orders.rfm + WHERE user_type='一般挽留用户' ) as b ON a.customer_id=b.customer_id + LEFT JOIN orders.review c ON a.order_id=c.order_id + LEFT JOIN orders.product d ON a.product_id=d.product_id + LEFT JOIN orders.category e ON d.product_category_name=e.product_category_name + where e.product_category_name_english is not NULL + GROUP BY e.product_category_name_english limit 50 + ``` + +3. Update Data: + + After saving the dataset, you need to click the **Update Data** button and wait for the data update to complete before proceeding with the analysis: + + ![image-20230809091814920](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/finebi/update-data.png) + +4. Create Analytic Themes: + + The analytic theme in this example is used to visually present data for general retained users, important retained users, monthly ARPU, quarterly ARPU, active users in different periods, daily active users, monthly active users, and order counts. It assists in decision-making and improving business operations. Here are the specific steps to create an analytic theme: + + - Click **My Analysis**, then click **New Folder** to create and select a folder. + - Click **New Analytic Theme**, select the dataset created in the previous step, and then click **OK**. + + ![image-20230809092959252](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/finebi/create-analytic.png) + + __Note:__ You can use the **Batch Selection** feature to select multiple datasets for theme analysis. + + ![image-20230809092959252](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/finebi/batch-select.png) + + Click the **Add Component** button, choose the chart type, drag the fields from the left to the right as needed, double-click to modify the field visualization name, and change the component name below to describe the content of the report analyzed by the component: + + ![image-20230809092959252](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/finebi/add-compon-1.png) + + ![image-20230809092959252](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/finebi/add-compon-2.png) + +5. Assemble Dashboards: + + Click **Add Dashboard** to add the components you just created to the dashboard. You can freely drag and resize the components and change the component names below to describe the report's content analyzed by the component. + + ![image-20230810123913230](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/finebi/add-dashboard.png) + +6. Publish Dashboards: + + After assembling the dashboard, click **Publish**, set the publication name, publication node, and display platform. Then click **Confirm**, and your dashboard will be successfully published. + + ![image-20230810123913230](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/finebi/publish.png) + + Now, see the newly published dashboard under **Navigation** and see how it looks. + + ![image-20230810131752645](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/finebi/published.png) diff --git a/docs/MatrixOne/Develop/Ecological-Tools/BI-Connection/Superset-connection.md b/docs/MatrixOne/Develop/Ecological-Tools/BI-Connection/Superset-connection.md new file mode 100644 index 000000000..58e12c2e4 --- /dev/null +++ b/docs/MatrixOne/Develop/Ecological-Tools/BI-Connection/Superset-connection.md @@ -0,0 +1,217 @@ +# Visual Monitoring of MatrixOne with Superset + +## Overview + +Superset is an open-source, modern, and lightweight BI analysis tool that can connect to multiple data sources, provide rich visualizations, and support custom dashboards, making it easy for users to explore and present data. + +MatrixOne version 1.0 now supports integration with the data visualization tool Superset. This guide will walk you through the quick deployment of MatrixOne and Superset environments. Combining MatrixOne with Superset's visualization capabilities allows you to create a simple monitoring dashboard to track the 'system_metric' data within the MatrixOne database. + +If you wish to extend the functionality further, you can explore other configuration options to monitor various aspects of the entire MatrixOne database. + +## Before you start + +### Recommended Hardware Requirements + +This practice does not require high hardware specifications. A small virtual machine with 2 cores and 4GB of RAM is sufficient for experiencing the functionality of this process. + +- Recommended hardware resources: 8 cores and 32GB of RAM for a virtual machine. + +### Recommended Software Environment + +Before proceeding with this practice, you need to install and configure the following software environment: + +- Docker, with a version of 23.0.1 or higher. +- MatrixOne +- Superset, recommended version 2.1.0. + +You can follow the steps in the following sections to install and configure them step by step. + +#### Installing Docker + +All software environments in this practice are based on Docker installation. You can refer to the [official Docker documentation](https://docs.docker.com/get-docker/) for installing and starting Docker. + +#### Installing MatrixOne + +You can install and deploy MatrixOne based on your operating system environment by following these links: + +- Deploying MatrixOne using Docker in macOS environment: [Installation Guide](../../../Get-Started/install-on-macos/install-on-macos-method3.md) +- Deploying MatrixOne using Docker in Linux environment: [Installation Guide](../../../Get-Started/install-on-linux/install-on-linux-method3.md) + +#### Installing Superset + +Here are the steps for deploying a single-node Superset using Docker: + +1. After installing and starting Docker, use the following command to pull the Superset image from Docker Hub: + + ``` + docker pull amancevice/superset + ``` + +2. Start the Superset image with the following command: + + ``` + docker run -e "SUPERSET_SECRET_KEY=your_secret_key_here" --name superset -u 0 -d -p 8088:8088 amancevice/superset + ``` + + !!! note + You can generate a secure secret key using `openssl rand -base64 42`. Alternatively, you can set it using the `SUPERSET_SECRET_KEY` environment variable. + +3. Initialize the Superset database with the following command: + + ``` + docker exec -it superset superset db upgrade + ``` + +4. Create a Superset admin user by running the following command and providing the requested registration information: + + ``` + docker exec -it superset superset fab create-admin + ``` + +5. Create default roles and permissions using the following command: + + ``` + docker exec -it superset superset init + ``` + +6. Start the Superset service with threads, auto-reloading, and debugging using the following command: + + ``` + docker exec -it superset superset run --with-threads --reload --debugger + ``` + +## Connecting MatrixOne with Superset + +1. Access the Superset login page, typically at `http://ip:8080`. Then, enter your username and password to log in to Superset. + + ![Superset Login Page](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/superset/superset-login.png) + + __Note:__ The port for Superset may be either 8080 or 8088, depending on your configuration. The username and password are the ones you set during the Superset deployment. + + After logging in, you will see the main interface of Superset. + + ![Superset Main Interface](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/superset/superset-dashboard.png) + +2. Create a database connection: + + In Superset, you first need to create a database connection to MatrixOne. Click on **Settings** in the top right corner and select **Database Connections**. + + Click the **+ DATABASE** button on the Database Connections page and choose **MySQL** as the database type. + + Fill in the connection information for the MatrixOne database, including the host, port, username, and password. + + ![Create Database Connection](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/superset/superset-create-db-connection.png) + + After filling in the details, click the **CONNECT** button and then click **FINISH**. + + ![Create Query](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/superset/superset-create-query.png) + +## Creating Visual Monitoring Dashboards + +Now, you can use the MatrixOne database to create a monitoring dashboard. + +1. Click on **SQL > SQL Lab** on the page, select the MatrixOne database connection you created earlier, and write SQL queries to select the tables you want to monitor. + + ![image-20230807201143069](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/superset/sql-lab.png) + + You can write multiple queries to monitor different metrics. Here are example SQL statements for some queries: + + - CPU Usage: + + ```sql + SELECT metric_name, collecttime, value + FROM metric + WHERE metric_name = 'sys_cpu_combined_percent' OR metric_name = 'sys_cpu_seconds_total' + ORDER BY collecttime DESC; + ``` + + - Storage Usage: + + ```sql + SELECT metric_name, value, collecttime + FROM metric + WHERE metric_name = 'server_storage_usage' + ORDER BY collecttime DESC; + ``` + + - Number of Connections: + + ```sql + SELECT metric_name, value, collecttime + FROM metric + WHERE metric_name = 'server_connections' + ORDER BY collecttime DESC; + ``` + + - Disk Read and Write: + + ```sql + SELECT metric_name, value, collecttime + FROM metric + WHERE metric_name = 'sys_disk_read_bytes' OR metric_name = 'sys_disk_write_bytes' + ORDER BY collecttime DESC; + ``` + + - Network Receive and Send: + + ```sql + SELECT metric_name, value, collecttime + FROM metric + WHERE metric_name = 'sys_net_sent_bytes' OR metric_name = 'sys_net_recv_bytes' + ORDER BY collecttime DESC; + ``` + + - Memory Usage: + + ```sql + SELECT metric_name, value, collecttime + FROM metric + WHERE metric_name = 'sys_memory_available' OR metric_name = 'sys_memory_used' + ORDER BY collecttime DESC; + ``` + + - Transaction Errors: + + ```sql + SELECT metric_name, value, collecttime + FROM metric + WHERE metric_name = 'sql_transaction_errors' OR metric_name = 'sql_transaction_total' + ORDER BY collecttime DESC; + ``` + + - SQL Errors: + + ```sql + SELECT metric_name, value, collecttime + FROM metric + WHERE metric_name = 'sql_statement_errors' OR metric_name = 'sql_statement_total' + ORDER BY collecttime DESC; + ``` + +2. Click **SAVE > Save dataset > SAVE & EXPLORE** to save each of the queries above and use them as data sources for subsequent charts. + +3. Edit the charts: + + Here, we'll use one of the queries as an example to demonstrate how to edit a visual chart. First, select the 'disk_read_write' query as the data source for the chart. In the SQL Lab, click **CREATE CHART** below the corresponding query, or if you've saved the query in the previous step, the page will redirect to the Chart editing page: + + ![Create Dashboard](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/superset/superset-create-dashboard.png) + +4. In the chart editing page, choose chart type, time field, metric columns from the query, grouping columns, and other options. Once configured, select **RUN**: + + ![View Dashboard](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/superset/superset-view-dashboard.png) + +5. Click **UPDATE CHART > SAVE** to save the edited chart. + +## Organizing Dashboards + +1. After creating multiple charts, you can assemble them in Superset to create a monitoring dashboard: + + Click on **Dashboards**, then click **+ DASHBOARD** to create a new dashboard or edit an existing one. + + ![image-20230808101636134](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/superset/superset-add-dashboard.png) + +2. In the dashboard editing page, you can drag the charts you've created from the CHARTS list on the right onto the dashboard for assembly. You can also freely adjust the position of charts, add titles, and more. + + ![image-20230808102033250](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/superset/superset-edit-dashboard.png) + +You have successfully connected the MatrixOne database with Superset and created a simple monitoring dashboard to visualize key metrics of the MatrixOne database. diff --git a/docs/MatrixOne/Develop/Ecological-Tools/BI-Connection/yonghong-connection.md b/docs/MatrixOne/Develop/Ecological-Tools/BI-Connection/yonghong-connection.md new file mode 100644 index 000000000..ed1cb44d7 --- /dev/null +++ b/docs/MatrixOne/Develop/Ecological-Tools/BI-Connection/yonghong-connection.md @@ -0,0 +1,50 @@ +# Visualizing MatrixOne Reports with Yonghong BI + +## Overview + +Yonghong BI is a comprehensive big data platform that integrates self-service data preparation, exploratory self-service analysis, in-depth analysis, enterprise-level management, and high-performance computing capabilities, providing an all-in-one big data solution. Yonghong BI aims to provide flexible and user-friendly end-to-end big data analysis tools for enterprises of all sizes, enabling users to easily uncover the value of big data and gain profound insights. + +MatrixOne supports connectivity to the intelligent data analysis tool, Yonghong BI. This article will guide you on connecting to the standalone version of MatrixOne through Yonghong BI and creating various visual data reports. + +## Before you start + +- MatrixOne installation and startup are completed. [Install and Start MatrixOne](../../../Get-Started/install-standalone-matrixone.md). +- Yonghong BI is installed. Yonghong BI is a free intelligent data analysis tool based on native installation, eliminating the need for complex deployment steps. + +## Connecting MatrixOne Services with Yonghong BI + +### Adding a Data Source + +Open Yonghong BI, select **Add Data Source > + (New Data Source)** on the left, and choose **MySQL** in the pop-up database options. + +![Add Data Source](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/yonghong/yonghong_add_connect.png) + +After filling in the connection information related to the MatrixOne database, you can select the **Test Connection** button in the upper right corner to ensure a successful connection. + +Once the connection is successful, click **Save** to save the data source information we just filled in. + +![Connect to MatrixOne](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/yonghong/yonghong_connect.png) + +### Creating a Dataset + +In Yonghong BI, select the **Create Dataset** menu on the left, then choose the data source you added just now. You will see tables and views from the MatrixOne database. To meet your business needs, add **Custom SQL**, then click **Refresh Data**. The query results will be displayed on the right. After confirming that the query results meet expectations, click **Save** to save the dataset. + +![Create Dataset](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/yonghong/yonghong_dataset.png) + +### Creating Reports + +First, in Yonghong BI, select the **Create Report** menu on the left, then choose the appropriate **Chart Component** from the right and drag it to the left. + +![Create Report](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/yonghong/yonghong_panel_add.png) + +Select the dataset you just created, set the time dimension as the X-axis, and set the daily order count and active user count as the Y-axis. You can drag the measurement and dimension **fields to their respective positions as needed**. After editing, click **Save** to save the report you created. + +![Create Report](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/yonghong/yonghong_report.png) + +### Viewing Reports + +Finally, in Yonghong BI, select **View Report**, then click on the report name we created in the tree menu on the left. You will be able to view the report we created above. + +![View Report](https://github.com/matrixorigin/artwork/blob/main/docs/develop/bi-connection/yonghong/yonghong_result.png) + +You have successfully connected to the MatrixOne database using Yonghong BI and created a simple report for visualizing MatrixOne data. diff --git a/docs/MatrixOne/Develop/Ecological-Tools/Computing-Engine/DataX-write.md b/docs/MatrixOne/Develop/Ecological-Tools/Computing-Engine/DataX-write.md new file mode 100644 index 000000000..e1369c3c6 --- /dev/null +++ b/docs/MatrixOne/Develop/Ecological-Tools/Computing-Engine/DataX-write.md @@ -0,0 +1,212 @@ +# Writing Data to MatrixOne Using DataX + +## Overview + +This article explains using the DataX tool to write data to offline MatrixOne databases. + +DataX is an open-source heterogeneous data source offline synchronization tool developed by Alibaba. It provides stable and efficient data synchronization functions to achieve efficient data synchronization between various heterogeneous data sources. + +DataX divides the synchronization of different data sources into two main components: **Reader (read data source)** and **Writer (write to the target data source)**. The DataX framework theoretically supports data synchronization work for any data source type. + +MatrixOne is highly compatible with MySQL 8.0. However, since the MySQL Writer plugin with DataX is adapted to the MySQL 5.1 JDBC driver, the community has separately modified the MatrixOneWriter plugin based on the MySQL 8.0 driver to improve compatibility. The MatrixOneWriter plugin implements the functionality of writing data to the target table in the MatrixOne database. In the underlying implementation, MatrixOneWriter connects to the remote MatrixOne database via JDBC and executes the corresponding `insert into ...` SQL statements to write data to MatrixOne. It also supports batch commits for performance optimization. + +MatrixOneWriter uses DataX to retrieve generated protocol data from the Reader and generates the corresponding `insert into ...` statements based on your configured `writeMode`. In the event of primary key or uniqueness index conflicts, conflicting rows are excluded, and writing continues. For performance optimization, we use the `PreparedStatement + Batch` method and set the `rewriteBatchedStatements=true` option to buffer data to the thread context buffer. The write request is triggered only when the data volume in the buffer reaches the specified threshold. + +![DataX](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Computing-Engine/datax-write/datax.png) + +!!! note + To execute the entire task, you must have permission to execute `insert into ...`. Whether other permissions are required depends on the `preSql` and `postSql` in your task configuration. + +MatrixOneWriter mainly aims at ETL development engineers who use MatrixOneWriter to import data from data warehouses into MatrixOne. At the same time, MatrixOneWriter can also serve as a data migration tool for users such as DBAs. + +## Before you start + +Before using DataX to write data to MatrixOne, you need to complete the installation of the following software: + +- Install [JDK 8+ version](https://www.oracle.com/sg/java/technologies/javase/javase8-archive-downloads.html). +- Install [Python 3.8 (or newer)](https://www.python.org/downloads/). +- Download the [DataX](https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/202210/datax.tar.gz) installation package and unzip it. +- Download [matrixonewriter.zip](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Computing-Engine/datax-write/matrixonewriter.zip) and unzip it to the `plugin/writer/` directory in the root directory of your DataX project. +- Install the [MySQL Client](https://dev.mysql.com/downloads/mysql). +- [Install and start MatrixOne](../../../Get-Started/install-standalone-matrixone.md). + +## Steps + +### Create a MatrixOne Table + +Connect to MatrixOne using the MySQL Client and create a test table in MatrixOne: + +```sql +CREATE DATABASE mo_demo; +USE mo_demo; +CREATE TABLE m_user( + M_ID INT NOT NULL, + M_NAME CHAR(25) NOT NULL +); +``` + +### Configure the Data Source + +In this example, we use data generated **in memory** as the data source: + +```json +"reader": { + "name": "streamreader", + "parameter": { + "column" : [ # You can write multiple columns + { + "value": 20210106, # Represents the value of this column + "type": "long" # Represents the type of this column + }, + { + "value": "matrixone", + "type": "string" + } + ], + "sliceRecordCount": 1000 # Indicates how many times to print + } +} +``` + +### Write the Job Configuration File + +Use the following command to view the configuration template: + +```shell +python datax.py -r {YOUR_READER} -w matrixonewriter +``` + +Write the job configuration file `stream2matrixone.json`: + +```json +{ + "job": { + "setting": { + "speed": { + "channel": 1 + } + }, + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column" : [ + { + "value": 20210106, + "type": "long" + }, + { + "value": "matrixone", + "type": "string" + } + ], + "sliceRecordCount": 1000 + } + }, + "writer": { + "name": "matrixonewriter", + "parameter": { + "writeMode": "insert", + "username": "root", + "password": "111", + "column": [ + "M_ID", + "M_NAME" + ], + "preSql": [ + "delete from m_user" + ], + "connection": [ + { + "jdbcUrl": "jdbc:mysql://127.0.0.1:6001/mo_demo", + "table": [ + "m_user" + ] + } + ] + } + } + } + ] + } +} +``` + +### Start DataX + +Execute the following command to start DataX: + +```shell +$ cd {YOUR_DATAX_DIR_BIN} +$ python datax.py stream2matrixone.json +``` + +### View the Results + +Connect to MatrixOne using the MySQL Client and use `select` to query the inserted results. The 1000 records in memory have been successfully written to MatrixOne. + +```sql +mysql> select * from m_user limit 5; ++----------+-----------+ +| m_id | m_name | ++----------+-----------+ +| 20210106 | matrixone | +| 20210106 | matrixone | +| 20210106 | matrixone | +| 20210106 | matrixone | +| 20210106 | matrixone | ++----------+-----------+ +5 rows in set (0.01 sec) + +mysql> select count(*) from m_user limit 5; ++----------+ +| count(*) | ++----------+ +| 1000 | ++----------+ +1 row in set (0.00 sec) +``` + +## Parameter Descriptions + +Here are some commonly used parameters for MatrixOneWriter: + +| Parameter Name | Parameter Description | Mandatory | Default Value | +| --- | --- | --- | --- | +| **jdbcUrl** | JDBC connection information for the target database. DataX will append some attributes to the provided `jdbcUrl` during runtime, such as `yearIsDateType=false&zeroDateTimeBehavior=CONVERT_TO_NULL&rewriteBatchedStatements=true&tinyInt1isBit=false&serverTimezone=Asia/Shanghai`. | Yes | None | +| **username** | Username for the target database. | Yes | None | +| **password** | Password for the target database. | Yes | None | +| **table** | Name of the target table. Supports writing to one or more tables. If configuring multiple tables, make sure their structures are consistent. | Yes | None | +| **column** | Fields in the target table that must be written with data, separated by commas. For example: `"column": ["id","name","age"]`. To write all columns, you can use `*`, for example: `"column": ["*"]`. | Yes | None | +| **preSql** | Standard SQL statements to be executed before writing data to the target table. | No | None | +| **postSql** | Standard SQL statements to be executed after writing data to the target table. | No | None | +| **writeMode** | Controls the SQL statements used when writing data to the target table. You can choose `insert` or `update`. | `insert` or `update` | `insert` | +| **batchSize** | Size of records for batch submission. This can significantly reduce network interactions between DataX and MatrixOne, improving overall throughput. However, setting it too large may cause DataX to run out of memory. | No | 1024 | + +## Type Conversion + +MatrixOneWriter supports most MatrixOne data types, but a few types still need to be supported, so you need to pay special attention to your data types. + +Here is a list of type conversions that MatrixOneWriter performs for MatrixOne data types: + +| DataX Internal Type | MatrixOne Data Type | +| ------------------- | ------------------- | +| Long | int, tinyint, smallint, bigint | +| Double | float, double, decimal | +| String | varchar, char, text | +| Date | date, datetime, timestamp, time | +| Boolean | bool | +| Bytes | blob | + +## Additional References + +- MatrixOne is compatible with the MySQL protocol. MatrixOneWriter is a modified version of the MySQL Writer with adjustments for JDBC driver versions. You can still use the MySQL Writer to write to MatrixOne. + +- To add the MatrixOne Writer in DataX, you need to download [matrixonewriter.zip](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Computing-Engine/datax-write/matrixonewriter.zip) and unzip it into the `plugin/writer/` directory in the root directory of your DataX project. + +## Ask and Questions + +**Q: During runtime, I encountered the error "Configuration information error, the configuration file you provided /{YOUR_MATRIXONE_WRITER_PATH}/plugin.json does not exist." What should I do?** + +A: DataX attempts to find the plugin.json file by searching for similar folders when it starts. If the matrixonewriter.zip file also exists in the same directory, DataX will try to find it in `.../datax/plugin/writer/matrixonewriter.zip/plugin.json`. In the MacOS environment, DataX will also attempt to see it in `.../datax/plugin/writer/.DS_Store/plugin.json`. In this case, you need to delete these extra files or folders. diff --git a/docs/MatrixOne/Develop/Ecological-Tools/Computing-Engine/seatunnel-write.md b/docs/MatrixOne/Develop/Ecological-Tools/Computing-Engine/seatunnel-write.md new file mode 100644 index 000000000..e04fb2e42 --- /dev/null +++ b/docs/MatrixOne/Develop/Ecological-Tools/Computing-Engine/seatunnel-write.md @@ -0,0 +1,131 @@ +# Writing Data to MatrixOne Using SeaTunnel + +## Overview + +[SeaTunnel](https://seatunnel.apache.org/) is a distributed, high-performance, and highly scalable data integration platform that focuses on synchronizing and transforming massive data, including offline and real-time data. MatrixOne supports using SeaTunnel to synchronize data from other databases and can efficiently handle hundreds of billions of records. + +This document will explain how to use SeaTunnel to write data to MatrixOne. + +## Before you start + +Before using SeaTunnel to write data to MatrixOne, make sure to complete the following preparations: + +- Install and start MatrixOne by following the steps in [Install and Start MatrixOne](../../../Get-Started/install-standalone-matrixone.md). + +- Install SeaTunnel Version 2.3.3 by downloading it from [here](https://www.apache.org/dyn/closer.lua/seatunnel/2.3.3/apache-seatunnel-2.3.3-bin.tar.gz). After installation, you can define the installation path of SeaTunnel using a shell command: + +```shell +export SEATNUNNEL_HOME="/root/seatunnel" +``` + +## Steps + +### Create Test Data + +1. Create a MySQL database named `test1` and create a table named `test_table` within it. Store this in a file named `mysql.sql` under the root directory. Here's the MySQL DDL statement: + + ```sql + create database test1; + use test1; + CREATE TABLE `test_table` ( + `name` varchar(255) DEFAULT NULL, + `age` int(11) DEFAULT NULL + ) ENGINE=InnoDB DEFAULT CHARSET=utf8; + ``` + +2. Use the [mo_ctl](https://docs.matrixorigin.cn/1.0.0-rc1/MatrixOne/Maintain/mo_ctl/) tool to import the MySQL DDL statements into MatrixOne directly. Execute the following command: + + ```shell + mo_ctl sql /root/mysql.sql + ``` + +### Install the Connectors Plugin + +This document will explain how to use SeaTunnel's `connector-jdbc` connection plugin to connect to MatrixOne. + +1. In the `${SEATNUNNEL_HOME}/config/plugin_config` file of SeaTunnel, add the following content: + + ```shell + --connectors-v2-- + connector-jdbc + --end-- + ``` + +2. SeaTunnel binary package version 2.3.3 does not provide connector dependencies by default. You need to install the connectors when using SeaTunnel for the first time by running the following command: + + ```shell + sh bin/install-plugin.sh 2.3.3 + ``` + + __Note:__ This document uses the SeaTunnel engine to write data to MatrixOne without relying on Flink or Spark. + +## Define the Task Configuration File + +In this document, we use the `test_table` table in the MySQL database as the data source, and we write data directly to the `test_table` table in the MatrixOne database without data processing. + +Due to data compatibility issues, you need to configure the task configuration file `${SEATNUNNEL_HOME}/config/v2.batch.config.template`, which defines how SeaTunnel handles data input, processing, and output logic after it starts. + +Edit the configuration file with the following content: + +```shell +env { + execution.parallelism = 2 + job.mode = "BATCH" +} + +source { + Jdbc { + url = "jdbc:mysql://192.168.110.40:3306/test" + driver = "com.mysql.cj.jdbc.Driver" + connection_check_timeout_sec = 100 + user = "root" + password = "123456" + query = "select * from test_table" + } +} + +transform { + +} + +sink { + jdbc { + url = "jdbc:mysql://192.168.110.248:6001/test" + driver = "com.mysql.cj.jdbc.Driver" + user = "root" + password = "111" + query = "insert into test_table(name,age) values(?,?)" + } +} +``` + +### Install Database Dependencies + +Download [mysql-connector-java-8.0.33.jar](https://downloads.mysql.com/archives/get/p/3/file/mysql-connector-j-8.0.33.zip) and copy the file to the `${SEATNUNNEL_HOME}/plugins/jdbc/lib/` directory. + +### Run the SeaTunnel Application + +Execute the following command to start the SeaTunnel application: + +```shell +./bin/seatunnel.sh --config ./config/v2.batch.config.template -e local +``` + +### View the Results + +After SeaTunnel finishes running, it will display statistics similar to the following, summarizing the time taken for this write operation, the total number of data read, the total number of writes, and the total number of write failures: + +```shell +*********************************************** + Job Statistic Information +*********************************************** +Start Time : 2023-08-07 16:45:02 +End Time : 2023-08-07 16:45:05 +Total Time(s) : 3 +Total Read Count : 5000000 +Total Write Count : 5000000 +Total Failed Count : 0 +*********************************************** +``` + +You have successfully synchronized data from a MySQL database into the MatrixOne database. diff --git a/docs/MatrixOne/Develop/Transactions/matrixone-transaction-overview/how-to-use.md b/docs/MatrixOne/Develop/Transactions/matrixone-transaction-overview/how-to-use.md index 279344453..a6f2fcc66 100644 --- a/docs/MatrixOne/Develop/Transactions/matrixone-transaction-overview/how-to-use.md +++ b/docs/MatrixOne/Develop/Transactions/matrixone-transaction-overview/how-to-use.md @@ -81,4 +81,4 @@ __Note:__ If you only add the transaction mode parameter `mode = "optimistic"`, Restart MatrixOne to make the switched transaction mode take effect. -For more information on the configuration parameters, see [Distributed Common Parameters Configuration](../../Reference/System-Parameters/distributed-configuration-settings.md). +For more information on the configuration parameters, see [Distributed Common Parameters Configuration](../../../Reference/System-Parameters/distributed-configuration-settings.md). diff --git a/docs/MatrixOne/Develop/read-data/cte.md b/docs/MatrixOne/Develop/read-data/cte.md index 35a6a4d26..a67b64197 100644 --- a/docs/MatrixOne/Develop/read-data/cte.md +++ b/docs/MatrixOne/Develop/read-data/cte.md @@ -120,4 +120,4 @@ The non-recursive CTE example selects basic information of all employees from th The `RECURSIVE` keyword is needed to declare a recursive CTE. -For more information on using CTEs, see [WITH (Common Table Expressions)](.. //Reference/SQL-Reference/Data-Query-Language/with-cte.md). +For more information on using CTEs, see [WITH (Common Table Expressions)](../../Reference/SQL-Reference/Data-Query-Language/with-cte.md). diff --git a/docs/MatrixOne/Maintain/backup-restore/modump-backup-restore.md b/docs/MatrixOne/Maintain/backup-restore/modump-backup-restore.md index bb8e7a4a5..be2daa407 100644 --- a/docs/MatrixOne/Maintain/backup-restore/modump-backup-restore.md +++ b/docs/MatrixOne/Maintain/backup-restore/modump-backup-restore.md @@ -2,15 +2,15 @@ It is essential to back up your databases to recover your data and be up and running again in case problems occur, such as system crashes, hardware failures, or users deleting data by mistake. Backups are also essential as a safeguard before upgrading a MatrixOne installation, and they can be used to transfer a MatrixOne building to another system. -MatrixOne currently only supports logical backup through the `modump` utility. `modump` is a command-line utility used to generate the logical backup of the MatrixOne database. It produces SQL Statements that can be used to recreate the database objects and data. You can look up the syntax and usage guide in the [modump](../Develop/export-data/modump.md) chapter. +MatrixOne currently only supports logical backup through the `modump` utility. `modump` is a command-line utility used to generate the logical backup of the MatrixOne database. It produces SQL Statements that can be used to recreate the database objects and data. You can look up the syntax and usage guide in the [modump](../../Develop/export-data/modump.md) chapter. We will take a simple example to walk you through the backup and restore process with the `modump` utility. ## Steps -### 1. [Build the modump binary](../Develop/export-data/modump.md) +### 1. [Build the modump binary](../../Develop/export-data/modump.md) -For more information on how to build the `modump` binary, see [Build the modump binary](../Develop/export-data/modump.md). +For more information on how to build the `modump` binary, see [Build the modump binary](../../Develop/export-data/modump.md). If the `modump` binary has been built, you can continue to browse the next chapter **Generate the backup of a single database**. diff --git a/mkdocs.yml b/mkdocs.yml index 7a76a96e8..2052c9b87 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -145,6 +145,14 @@ nav: - SQLAlchemy CRUD demo: MatrixOne/Tutorial/sqlalchemy-python-crud-demo.md - Golang CRUD demo: MatrixOne/Tutorial/develop-golang-crud-demo.md - Gorm CRUD demo: MatrixOne/Tutorial/gorm-golang-crud-demo.md + - Ecological Tools: + - BI Tools: + - Visualizing MatrixOne Data with FineBI: MatrixOne/Develop/Ecological-Tools/BI-Connection/FineBI-connection.md + - Visualizing MatrixOne Reports with Yonghong BI: MatrixOne/Develop/Ecological-Tools/BI-Connection/yonghong-connection.md + - Visual Monitoring of MatrixOne with Superset: MatrixOne/Develop/Ecological-Tools/BI-Connection/Superset-connection.md + - ETL Tools: + - Writing Data to MatrixOne Using SeaTunnel: MatrixOne/Develop/Ecological-Tools/Computing-Engine/seatunnel-write.md + - Writing Data to MatrixOne Using DataX: MatrixOne/Develop/Ecological-Tools/Computing-Engine/DataX-write.md - Deploying: - Plan MatrixOne Cluster Topology: - Cluster Topology Planning Overview: MatrixOne/Deploy/deployment-topology/topology-overview.md From 200ffa27367dd57b123599f4da687db59dfd3cde Mon Sep 17 00:00:00 2001 From: prinz <43231571+lacrimosaprinz@users.noreply.github.com> Date: Thu, 21 Sep 2023 15:27:27 +0800 Subject: [PATCH 2/5] Add files via upload --- .../Ecological-Tools/dolphinScheduler.md | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md diff --git a/docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md b/docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md new file mode 100644 index 000000000..7229186c5 --- /dev/null +++ b/docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md @@ -0,0 +1,126 @@ +# 使用 DolphinScheduler 连接 MatrixOne + +## 概述 + +Apache DolphinScheduler 是一个分布式、易扩展的可视化 DAG 工作流任务调度开源系统。它提供了一种解决方案,可以通过可视化操作任务、工作流和全生命周期的数据处理过程。 + +Apache DolphinScheduler 的主要目标是解决复杂的大数据任务依赖关系。它使用 DAG(Directed Acyclic Graph,有向无环图)的流式方式来组装任务,允许您实时监控任务的执行状态,支持任务重试、指定节点恢复失败、暂停、恢复、终止等操作。 + +MatrixOne 支持与可视化 DAG 工作流任务调度系统 DolphinScheduler 进行连接。本文将指导您如何通过 DolphinScheduler 连接到 MatrixOne 并创建任务工作流。 + +## 开始前准备 + +- 已完成[安装和启动 MatrixOne](../../../Get-Started/install-standalone-matrixone.md)。 + +- 已完成[安装 DolphinScheduler](https://dolphinscheduler.apache.org/zh-cn/docs/3.1.8/guide/installation/standalone)。 + +## 操作步骤 + +### 第一步:配置 MySQL 驱动 + +1. 下载 MySQL 驱动并将其复制到 libs 目录: + + 在安装完成后,您需要手动下载 [mysql-connector-java 驱动](https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.16/mysql-connector-java-8.0.16.jar)(版本 8.0.16),然后将它分别复制到 DolphinScheduler 安装目录下的四个目录中:`api-server/libs`、`alert-server/libs`、`master-server/libs` 和 `worker-server/libs`。 + + !!! 注意 + 推荐使用 `mysql-connector-java-8.0.16.jar` 作为 MySQL 驱动包。 + +2. 重启 DolphinScheduler: + + 复制驱动包完成后,需要重启 DolphinScheduler 服务。首先进入 DolphinScheduler 的安装目录,然后执行以下命令来重启 DolphinScheduler 服务: + + ```shell + # 停止 Standalone Server 服务 + bash ./bin/dolphinscheduler-daemon.sh stop standalone-server + # 启动 Standalone Server 服务 + bash ./bin/dolphinscheduler-daemon.sh start standalone-server + ``` + +3. 登录 DolphinScheduler: + + 使用默认用户名 `admin` 和密码 `dolphinscheduler123`,通过访问 登录 DolphinScheduler 的 Web 用户界面,如下图所示: + + ![image-20230809145317885](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809145317885.png) + +4. 创建数据源: + + 点击**数据源中心 > 创建数据源**,填写 MatrixOne 数据连接信息。完成后,点击**测试连接**,如果连接成功,点击**确定**保存: + + ![image-20230809145935857](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809145935857.png) + +### 第二步:创建项目工作流 + +1. 创建租户: + + 在**安全中心**中,点击**创建租户**,填写租户名称,如下图所示: + + ![image-20230809160632965](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809160632965.png) + + !!! 注意 + 在生产环境中,不建议使用 root 作为租户。 + +2. 创建项目: + + 在**项目管理**中,点击**创建项目**,填写项目名称,如下图所示: + + ![image-20230809150528364](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809150528364.png) + +3. 创建工作流并添加节点: + + 点击上一步创建的**项目名称**,然后点击**创建工作流**。从左侧拖动 **SQL** 节点到右侧的画布上,填写**节点名称**、**数据源信息**、**SQL 类型**、**SQL 语句**,然后点击**确定**。如下图所示: + + 此步骤创建的是一个建表节点,SQL 语句用于创建表格。 + + ![image-20230809151554568](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809151554568.png) + + 接下来,类似地创建**插入数据**和**查询数据**节点。这三个节点的依赖关系如下图,您可以手动连接它们: + + ![image-20230809153149428](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809153149428.png) + + 三个节点的 SQL 语句如下: + + ```sql + #create_table + + CREATE TABLE IF NOT EXISTS test_table (id INT AUTO_INCREMENT PRIMARY KEY, name + + VARCHAR(255) NOT NULL) + + #insert_data + + INSERT INTO test_table (name) VALUES ('John Doe') + + #select_data + + SELECT * FROM test_table + ``` + + 根据依赖关系连接这三个节点,然后点击**保存**。填写**工作流名称**,选择之前创建的**租户**,选择执行策略为**并行**,然后点击**确定**。 + + ![image-20230809161503945](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809161503945.png) + + 创建好工作流后,您可以在**工作流关系**页面看到创建的工作流,其状态为**工作流下线**: + + ![image-20230809161909925](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809161909925.png) + + 同样,您也可以在**工作流定义**页面看到定义的工作流,其状态为**下线**: + + ![image-20230809162411368](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809162411368.png) + +4. 上线并运行工作流: + + 工作流必须先上线才能运行。点击**上线**按钮,将之前创建的工作流上线: + + ![image-20230809162245088](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809162245088.png) + + 上线后,工作流的状态如下图所示: + + ![image-20230809163722777](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809163722777.png) + + 接下来,点击**运行**按钮,设置启动前的配置参数,然后点击**确定**: + + ![image-20230809162828049](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809162828049.png) + + 最后,返回**项目概况**,查看工作流以及下面的三个任务是否成功运行,如下图所示: + + ![image-20230809163533339](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809163533339.png) From 0a39e1a230f7ae549c1a80095f3187c7fd675261 Mon Sep 17 00:00:00 2001 From: prinz <43231571+lacrimosaprinz@users.noreply.github.com> Date: Thu, 21 Sep 2023 15:34:53 +0800 Subject: [PATCH 3/5] Update dolphinScheduler.md --- .../Ecological-Tools/dolphinScheduler.md | 114 +++++++++--------- 1 file changed, 56 insertions(+), 58 deletions(-) diff --git a/docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md b/docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md index 7229186c5..ed85f04fe 100644 --- a/docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md +++ b/docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md @@ -1,90 +1,88 @@ -# 使用 DolphinScheduler 连接 MatrixOne +# Connecting MatrixOne with DolphinScheduler -## 概述 +## Overview -Apache DolphinScheduler 是一个分布式、易扩展的可视化 DAG 工作流任务调度开源系统。它提供了一种解决方案,可以通过可视化操作任务、工作流和全生命周期的数据处理过程。 +Apache DolphinScheduler is a distributed, highly scalable open-source system for visual DAG (Directed Acyclic Graph) workflow task scheduling. It provides a solution for visually orchestrating tasks, workflows, and the entire data processing lifecycle. -Apache DolphinScheduler 的主要目标是解决复杂的大数据任务依赖关系。它使用 DAG(Directed Acyclic Graph,有向无环图)的流式方式来组装任务,允许您实时监控任务的执行状态,支持任务重试、指定节点恢复失败、暂停、恢复、终止等操作。 +The main goal of Apache DolphinScheduler is to address complex dependencies in large-scale data tasks. It assembles tasks streamingly using DAGs, allowing real-time monitoring of task execution status and supporting operations such as task retries, specifying node recovery for failures, and pausing, resuming, and terminating tasks. -MatrixOne 支持与可视化 DAG 工作流任务调度系统 DolphinScheduler 进行连接。本文将指导您如何通过 DolphinScheduler 连接到 MatrixOne 并创建任务工作流。 +MatrixOne supports integration with DolphinScheduler, a visual DAG workflow task scheduling system. This document will guide you on connecting MatrixOne to DolphinScheduler and creating task workflows. -## 开始前准备 +## Before you start -- 已完成[安装和启动 MatrixOne](../../../Get-Started/install-standalone-matrixone.md)。 +- Completed [MatrixOne installation and setup](../../../Get-Started/install-standalone-matrixone.md). -- 已完成[安装 DolphinScheduler](https://dolphinscheduler.apache.org/zh-cn/docs/3.1.8/guide/installation/standalone)。 +- Installed [DolphinScheduler installation](https://dolphinscheduler.apache.org/docs/3.1.8/en/installation/standalone). -## 操作步骤 +## Operating Steps -### 第一步:配置 MySQL 驱动 +### Step 1: Configure the MySQL Driver -1. 下载 MySQL 驱动并将其复制到 libs 目录: +1. Download the MySQL driver and copy it to the libs directory: - 在安装完成后,您需要手动下载 [mysql-connector-java 驱动](https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.16/mysql-connector-java-8.0.16.jar)(版本 8.0.16),然后将它分别复制到 DolphinScheduler 安装目录下的四个目录中:`api-server/libs`、`alert-server/libs`、`master-server/libs` 和 `worker-server/libs`。 + After installation, you need to manually download the [mysql-connector-java driver](https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.16/mysql-connector-java-8.0.16.jar) (version 8.0.16). Then, copy it to four directories in the DolphinScheduler installation directory: `api-server/libs`, `alert-server/libs`, `master-server/libs`, and `worker-server/libs`. - !!! 注意 - 推荐使用 `mysql-connector-java-8.0.16.jar` 作为 MySQL 驱动包。 + !!! Note + It is recommended to use `mysql-connector-java-8.0.16.jar` as the MySQL driver package. -2. 重启 DolphinScheduler: +2. Restart DolphinScheduler: - 复制驱动包完成后,需要重启 DolphinScheduler 服务。首先进入 DolphinScheduler 的安装目录,然后执行以下命令来重启 DolphinScheduler 服务: + After copying the driver package, you need to restart the DolphinScheduler service. First, go to the DolphinScheduler installation directory and then execute the following command to restart the DolphinScheduler service: ```shell - # 停止 Standalone Server 服务 + # Stop the Standalone Server service bash ./bin/dolphinscheduler-daemon.sh stop standalone-server - # 启动 Standalone Server 服务 + # Start the Standalone Server service bash ./bin/dolphinscheduler-daemon.sh start standalone-server ``` -3. 登录 DolphinScheduler: +3. Log in to DolphinScheduler: - 使用默认用户名 `admin` 和密码 `dolphinscheduler123`,通过访问 登录 DolphinScheduler 的 Web 用户界面,如下图所示: + Use the default username `admin` and password `dolphinscheduler123`. Access the DolphinScheduler web user interface by visiting http://ip:12345/dolphinscheduler/ui, as shown below: - ![image-20230809145317885](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809145317885.png) + ![image-20230809145317885](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809145317885.png) -4. 创建数据源: +4. Create a Data Source: - 点击**数据源中心 > 创建数据源**,填写 MatrixOne 数据连接信息。完成后,点击**测试连接**,如果连接成功,点击**确定**保存: + Click on **Data Source Center > Create Data Source** and enter the MatrixOne data connection information. Afterward, click on **Test Connection**; if the connection is successful, click **OK** to save it: - ![image-20230809145935857](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809145935857.png) + ![image-20230809145935857](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809145935857.png) -### 第二步:创建项目工作流 +### Step 2: Create a Project Workflow -1. 创建租户: +1. Create a Tenant: - 在**安全中心**中,点击**创建租户**,填写租户名称,如下图所示: + In the **Security Center**, click on **Create Tenant** and enter the tenant name, as shown below: - ![image-20230809160632965](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809160632965.png) + ![image-20230809160632965](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809160632965.png) - !!! 注意 - 在生产环境中,不建议使用 root 作为租户。 + !!! Note + In a production environment, it is not recommended to use `root` as the tenant. -2. 创建项目: +2. Create a Project: - 在**项目管理**中,点击**创建项目**,填写项目名称,如下图所示: + In **Project Management**, click on **Create Project** and enter the project name, as shown below: - ![image-20230809150528364](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809150528364.png) + ![image-20230809150528364](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809150528364.png) -3. 创建工作流并添加节点: +3. Create a Workflow and Add Nodes: - 点击上一步创建的**项目名称**,然后点击**创建工作流**。从左侧拖动 **SQL** 节点到右侧的画布上,填写**节点名称**、**数据源信息**、**SQL 类型**、**SQL 语句**,然后点击**确定**。如下图所示: + Click on the **Project Name** created in the previous step and then click on **Create Workflow**. Drag the **SQL** node from the left to the canvas on the right. Fill in the **Node Name**, **Data Source Information**, **SQL Type**, and **SQL Statement**, then click **OK**. As shown below: - 此步骤创建的是一个建表节点,SQL 语句用于创建表格。 + The node created in this step is for creating a table, and the SQL statement is used to create a table. - ![image-20230809151554568](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809151554568.png) + ![image-20230809151554568](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809151554568.png) - 接下来,类似地创建**插入数据**和**查询数据**节点。这三个节点的依赖关系如下图,您可以手动连接它们: + Next, create **Insert Data** and **Query Data** nodes in a similar way. The dependency relationship between these three nodes is shown below, and you can manually connect them: - ![image-20230809153149428](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809153149428.png) + ![image-20230809153149428](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809153149428.png) - 三个节点的 SQL 语句如下: + The SQL statements for these three nodes are as follows: ```sql #create_table - CREATE TABLE IF NOT EXISTS test_table (id INT AUTO_INCREMENT PRIMARY KEY, name - - VARCHAR(255) NOT NULL) + CREATE TABLE IF NOT EXISTS test_table (id INT AUTO_INCREMENT PRIMARY KEY, name VARCHAR(255) NOT NULL) #insert_data @@ -95,32 +93,32 @@ MatrixOne 支持与可视化 DAG 工作流任务调度系统 DolphinScheduler SELECT * FROM test_table ``` - 根据依赖关系连接这三个节点,然后点击**保存**。填写**工作流名称**,选择之前创建的**租户**,选择执行策略为**并行**,然后点击**确定**。 + Connect these three nodes based on their dependency relationship, then click **Save**. Enter the **Workflow Name**, select the previously created **Tenant**, choose **Parallel** as the execution policy, and click **OK**. - ![image-20230809161503945](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809161503945.png) + ![image-20230809161503945](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809161503945.png) - 创建好工作流后,您可以在**工作流关系**页面看到创建的工作流,其状态为**工作流下线**: + Once the workflow is created, you can see it in the **Workflow Relations** page with the status "Workflow Offline": - ![image-20230809161909925](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809161909925.png) + ![image-20230809161909925](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809161909925.png) - 同样,您也可以在**工作流定义**页面看到定义的工作流,其状态为**下线**: + Similarly, you can also see the defined workflow in the **Workflow Definitions** page with the status "Offline": - ![image-20230809162411368](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809162411368.png) + ![image-20230809162411368](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809162411368.png) -4. 上线并运行工作流: +4. Publish and Run the Workflow: - 工作流必须先上线才能运行。点击**上线**按钮,将之前创建的工作流上线: + A workflow must be published before it can be run. Click the **Publish** button to publish the workflow created earlier: - ![image-20230809162245088](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809162245088.png) + ![image-20230809162245088](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809162245088.png) - 上线后,工作流的状态如下图所示: + After publishing, the workflow status will appear as follows: - ![image-20230809163722777](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809163722777.png) + ![image-20230809163722777](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809163722777.png) - 接下来,点击**运行**按钮,设置启动前的配置参数,然后点击**确定**: + Next, click the **Run** button, set the configuration parameters before starting, and then click **OK**: - ![image-20230809162828049](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809162828049.png) + ![image-20230809162828049](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809162828049.png) - 最后,返回**项目概况**,查看工作流以及下面的三个任务是否成功运行,如下图所示: + Finally, return to the **Project Overview** to check whether the workflow and the three tasks below it have run successfully, as shown below: - ![image-20230809163533339](https://community-shared-data-1308875761.cos.ap-beijing.myqcloud.com/artwork/docs/develop/Scheduling-tool/image-20230809163533339.png) + ![image-20230809163533339](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809163533339.png) From 22470e0b4f6e5c8e4878b1ccd5d568df3a77c396 Mon Sep 17 00:00:00 2001 From: prinz <43231571+lacrimosaprinz@users.noreply.github.com> Date: Thu, 21 Sep 2023 15:36:30 +0800 Subject: [PATCH 4/5] Update mkdocs.yml --- mkdocs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mkdocs.yml b/mkdocs.yml index 2052c9b87..7aa07d762 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -153,6 +153,8 @@ nav: - ETL Tools: - Writing Data to MatrixOne Using SeaTunnel: MatrixOne/Develop/Ecological-Tools/Computing-Engine/seatunnel-write.md - Writing Data to MatrixOne Using DataX: MatrixOne/Develop/Ecological-Tools/Computing-Engine/DataX-write.md + - Scheduling Tools: + - Connecting MatrixOne with DolphinScheduler: MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md - Deploying: - Plan MatrixOne Cluster Topology: - Cluster Topology Planning Overview: MatrixOne/Deploy/deployment-topology/topology-overview.md From 0dd167853bc6488da9f643c295b225bb4f8f1981 Mon Sep 17 00:00:00 2001 From: prinz <43231571+lacrimosaprinz@users.noreply.github.com> Date: Thu, 21 Sep 2023 15:39:55 +0800 Subject: [PATCH 5/5] Update dolphinScheduler.md --- docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md b/docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md index ed85f04fe..672c36eb1 100644 --- a/docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md +++ b/docs/MatrixOne/Develop/Ecological-Tools/dolphinScheduler.md @@ -38,7 +38,7 @@ MatrixOne supports integration with DolphinScheduler, a visual DAG workflow task 3. Log in to DolphinScheduler: - Use the default username `admin` and password `dolphinscheduler123`. Access the DolphinScheduler web user interface by visiting http://ip:12345/dolphinscheduler/ui, as shown below: + Use the default username `admin` and password `dolphinscheduler123`. Access the DolphinScheduler web user interface by visiting `http://ip:12345/dolphinscheduler/ui`, as shown below: ![image-20230809145317885](https://github.com/matrixorigin/artwork/blob/main/docs/develop/Scheduling-tool/image-20230809145317885.png)