diff --git a/README b/README
new file mode 100755
index 0000000000..e69de29bb2
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000..0b4cbbe7ff
--- /dev/null
+++ b/README.md
@@ -0,0 +1,112 @@
+![Datax-logo](https://github.com/alibaba/DataX/blob/master/images/DataX-logo.jpg)
+
+
+
+# DataX
+
+DataX 是阿里巴巴集团内被广泛使用的离线数据同步工具/平台,实现包括 MySQL、Oracle、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、DRDS 等各种异构数据源之间高效的数据同步功能。
+
+
+
+# Features
+
+DataX本身作为数据同步框架,将不同数据源的同步抽象为从源头数据源读取数据的Reader插件,以及向目标端写入数据的Writer插件,理论上DataX框架可以支持任意数据源类型的数据同步工作。同时DataX插件体系作为一套生态系统, 每接入一套新数据源该新加入的数据源即可实现和现有的数据源互通。
+
+
+
+# DataX详细介绍
+
+##### 请参考:[DataX-Introduction](https://github.com/alibaba/DataX/wiki/DataX-Introduction)
+
+
+
+# Quick Start
+
+##### Download [DataX下载地址](http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz)
+
+##### 请点击:[Quick Start](https://github.com/alibaba/DataX/wiki/Quick-Start)
+* [配置示例:从MySQL读取数据 写入ODPS](https://github.com/alibaba/DataX/wiki/Quick-Start)
+* [配置定时任务](https://github.com/alibaba/DataX/wiki/%E9%85%8D%E7%BD%AE%E5%AE%9A%E6%97%B6%E4%BB%BB%E5%8A%A1%EF%BC%88Linux%E7%8E%AF%E5%A2%83%EF%BC%89)
+* [动态传入参数](https://github.com/alibaba/DataX/wiki/%E5%8A%A8%E6%80%81%E4%BC%A0%E5%85%A5%E5%8F%82%E6%95%B0)
+
+
+
+# Support Data Channels
+
+DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、NOSQL、大数据计算系统都已经接入,目前支持数据如下图,详情请点击:[DataX数据源参考指南](https://github.com/alibaba/DataX/wiki/DataX-all-data-channels)
+
+| 类型 | 数据源 | Reader(读) | Writer(写) |
+| ------------ | ---------- | :-------: | :-------: |
+| RDBMS 关系型数据库 | Mysql | √ | √ |
+| | Oracle | √ | √ |
+| | SqlServer | √ | √ |
+| | Postgresql | √ | √ |
+| | DRDS | √ | √ |
+| | 达梦 | √ | √ |
+| 阿里云数仓数据存储 | ODPS | √ | √ |
+| | ADS | | √ |
+| | OSS | √ | √ |
+| | OCS | √ | √ |
+| NoSQL数据存储 | OTS | √ | √ |
+| | Hbase0.94 | √ | √ |
+| | Hbase1.1 | √ | √ |
+| | MongoDB | √ | √ |
+| 无结构化数据存储 | TxtFile | √ | √ |
+| | FTP | √ | √ |
+| | HDFS | √ | √ |
+
+
+# 我要开发新的插件
+请点击:[DataX插件开发宝典](https://github.com/alibaba/DataX/wiki/DataX%E6%8F%92%E4%BB%B6%E5%BC%80%E5%8F%91%E5%AE%9D%E5%85%B8)
+
+# 项目成员
+
+核心Contributions: 光戈、一斅、祁然、云时
+
+感谢天烬、巴真、静行对DataX做出的贡献。
+
+# License
+
+This software is free to use under the Apache License [Apache license](https://github.com/alibaba/DataX/blob/master/license.txt).
+
+#
+请及时提出issue给我们。请前往:[DataxIssue](https://github.com/alibaba/DataX/issues)
+
+# 开源版DataX企业用户
+
+![Datax-logo](https://github.com/alibaba/DataX/blob/master/images/datax-enterprise-users.jpg)
+
+```
+长期招聘 联系邮箱:hanfa.shf@alibaba-inc.com
+【JAVA开发职位】
+职位名称:JAVA资深开发工程师/专家/高级专家
+工作年限 : 2年以上
+学历要求 : 本科(如果能力靠谱,这些都不是条件)
+期望层级 : P6/P7/P8
+
+岗位描述:
+ 1. 负责阿里云大数据平台(数加)的开发设计。
+ 2. 负责面向政企客户的大数据相关产品开发;
+ 3. 利用大规模机器学习算法挖掘数据之间的联系,探索数据挖掘技术在实际场景中的产品应用 ;
+ 4. 一站式大数据开发平台
+ 5. 大数据任务调度引擎
+ 6. 任务执行引擎
+ 7. 任务监控告警
+ 8. 海量异构数据同步
+
+岗位要求:
+ 1. 拥有3年以上JAVA Web开发经验;
+ 2. 熟悉Java的基础技术体系。包括JVM、类装载、线程、并发、IO资源管理、网络;
+ 3. 熟练使用常用Java技术框架、对新技术框架有敏锐感知能力;深刻理解面向对象、设计原则、封装抽象;
+ 4. 熟悉HTML/HTML5和JavaScript;熟悉SQL语言;
+ 5. 执行力强,具有优秀的团队合作精神、敬业精神;
+ 6. 深刻理解设计模式及应用场景者加分;
+ 7. 具有较强的问题分析和处理能力、比较强的动手能力,对技术有强烈追求者优先考虑;
+ 8. 对高并发、高稳定可用性、高性能、大数据处理有过实际项目及产品经验者优先考虑;
+ 9. 有大数据产品、云产品、中间件技术解决方案者优先考虑。
+````
+钉钉用户请扫描以下二维码进行讨论:
+
+![DataX-OpenSource-Dingding](https://raw.githubusercontent.com/alibaba/DataX/master/images/datax-opensource-dingding.png)
+
+
diff --git a/adswriter/doc/adswriter.md b/adswriter/doc/adswriter.md
new file mode 100644
index 0000000000..f80229bbc7
--- /dev/null
+++ b/adswriter/doc/adswriter.md
@@ -0,0 +1,314 @@
+# DataX ADS写入
+
+
+---
+
+
+## 1 快速介绍
+
+
+
+欢迎ADS加入DataX生态圈!ADSWriter插件实现了其他数据源向ADS写入功能,现有DataX所有的数据源均可以无缝接入ADS,实现数据快速导入ADS。
+
+ADS写入预计支持两种实现方式:
+
+* ADSWriter 支持向ODPS中转落地导入ADS方式,优点在于当数据量较大时(>1KW),可以以较快速度进行导入,缺点引入了ODPS作为落地中转,因此牵涉三方系统(DataX、ADS、ODPS)鉴权认证。
+
+* ADSWriter 同时支持向ADS直接写入的方式,优点在于小批量数据写入能够较快完成(<1KW),缺点在于大数据导入较慢。
+
+
+注意:
+
+> 如果从ODPS导入数据到ADS,请用户提前在源ODPS的Project中授权ADS Build账号具有读取你源表ODPS的权限,同时,ODPS源表创建人和ADS写入属于同一个阿里云账号。
+
+-
+
+> 如果从非ODPS导入数据到ADS,请用户提前在目的端ADS空间授权ADS Build账号具备Load data权限。
+
+以上涉及ADS Build账号请联系ADS管理员提供。
+
+
+## 2 实现原理
+
+ADS写入预计支持两种实现方式:
+
+### 2.1 Load模式
+
+DataX 将数据导入ADS为当前导入任务分配的ADS项目表,随后DataX通知ADS完成数据加载。该类数据导入方式实际上是写ADS完成数据同步,由于ADS是分布式存储集群,因此该通道吞吐量较大,可以支持TB级别数据导入。
+
+![中转导入](http://aligitlab.oss-cn-hangzhou-zmf.aliyuncs.com/uploads/cdp/cdp/f805dea46b/_____2015-04-10___12.06.21.png)
+
+1. CDP底层得到明文的 jdbc://host:port/dbname + username + password + table, 以此连接ADS, 执行show grants; 前置检查该用户是否有ADS中目标表的Load Data或者更高的权限。注意,此时ADSWriter使用用户填写的ADS用户名+密码信息完成登录鉴权工作。
+
+2. 检查通过后,通过ADS中目标表的元数据反向生成ODPS DDL,在ODPS中间project中,以ADSWriter的账户建立ODPS表(非分区表,生命周期设为1-2Day), 并调用ODPSWriter把数据源的数据写入该ODPS表中。
+
+ 注意,这里需要使用中转ODPS的账号AK向中转ODPS写入数据。
+
+3. 写入完成后,以中转ODPS账号连接ADS,发起Load Data From ‘odps://中转project/中转table/' [overwrite] into adsdb.adstable [partition (xx,xx=xx)]; 这个命令返回一个Job ID需要记录。
+
+ 注意,此时ADS使用自己的Build账号访问中转ODPS,因此需要中转ODPS对这个Build账号提前开放读取权限。
+
+4. 连接ADS一分钟一次轮询执行 select state from information_schema.job_instances where job_id like ‘$Job ID’,查询状态,注意这个第一个一分钟可能查不到状态记录。
+
+5. Success或者Fail后返回给用户,然后删除中转ODPS表,任务结束。
+
+上述流程是从其他非ODPS数据源导入ADS流程,对于ODPS导入ADS流程使用如下流程:
+
+![直接导入](http://aligitlab.oss-cn-hangzhou-zmf.aliyuncs.com/uploads/cdp/cdp/b3a76459d1/_____2015-04-10___12.06.25.png)
+
+### 2.2 Insert模式
+
+DataX 将数据直连ADS接口,利用ADS暴露的INSERT接口直写到ADS。该类数据导入方式写入吞吐量较小,不适合大批量数据写入。有如下注意点:
+
+* ADSWriter使用JDBC连接直连ADS,并只使用了JDBC Statement进行数据插入。ADS不支持PreparedStatement,故ADSWriter只能单行多线程进行写入。
+
+* ADSWriter支持筛选部分列,列换序等功能,即用户可以填写列。
+
+* 考虑到ADS负载问题,建议ADSWriter Insert模式建议用户使用TPS限流,最高在1W TPS。
+
+* ADSWriter在所有Task完成写入任务后,Job Post单例执行flush工作,保证数据在ADS整体更新。
+
+
+## 3 功能说明
+
+### 3.1 配置样例
+
+* 这里使用一份从内存产生到ADS,使用Load模式进行导入的数据。
+
+```
+{
+ "job": {
+ "setting": {
+ "speed": {
+ "channel": 2
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "streamreader",
+ "parameter": {
+ "column": [
+ {
+ "value": "DataX",
+ "type": "string"
+ },
+ {
+ "value": "test",
+ "type": "bytes"
+ }
+ ],
+ "sliceRecordCount": 100000
+ }
+ },
+ "writer": {
+ "name": "adswriter",
+ "parameter": {
+ "odps": {
+ "accessId": "xxx",
+ "accessKey": "xxx",
+ "account": "xxx@aliyun.com",
+ "odpsServer": "xxx",
+ "tunnelServer": "xxx",
+ "accountType": "aliyun",
+ "project": "transfer_project"
+ },
+ "writeMode": "load",
+ "url": "127.0.0.1:3306",
+ "schema": "schema",
+ "table": "table",
+ "username": "username",
+ "password": "password",
+ "partition": "",
+ "lifeCycle": 2,
+ "overWrite": true,
+ }
+ }
+ }
+ ]
+ }
+}
+```
+
+* 这里使用一份从内存产生到ADS,使用Insert模式进行导入的数据。
+
+```
+{
+ "job": {
+ "setting": {
+ "speed": {
+ "channel": 2
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "streamreader",
+ "parameter": {
+ "column": [
+ {
+ "value": "DataX",
+ "type": "string"
+ },
+ {
+ "value": "test",
+ "type": "bytes"
+ }
+ ],
+ "sliceRecordCount": 100000
+ }
+ },
+ "writer": {
+ "name": "adswriter",
+ "parameter": {
+ "writeMode": "insert",
+ "url": "127.0.0.1:3306",
+ "schema": "schema",
+ "table": "table",
+ "column": ["*"],
+ "username": "username",
+ "password": "password",
+ "partition": "id,ds=2015"
+ }
+ }
+ }
+ ]
+ }
+}
+```
+
+
+
+### 3.2 参数说明 (用户配置规格)
+
+* **url**
+
+ * 描述:ADS连接信息,格式为"ip:port"。
+
+ * 必选:是
+
+ * 默认值:无
+
+* **schema**
+
+ * 描述:ADS的schema名称。
+
+ * 必选:是
+
+ * 默认值:无
+
+* **username**
+
+ * 描述:ADS对应的username,目前就是accessId
+
+ * 必选:是
+
+ * 默认值:无
+
+* **password**
+
+ * 描述:ADS对应的password,目前就是accessKey
+
+ * 必选:是
+
+ * 默认值:无
+
+* **table**
+
+ * 描述:目的表的表名称。
+
+ * 必选:是
+
+ * 默认值:无
+
+
+* **partition**
+
+ * 描述:目标表的分区名称,当目标表为分区表,需要指定该字段。
+
+ * 必选:否
+
+ * 默认值:无
+
+* **writeMode**
+
+ * 描述:支持Load和Insert两种写入模式
+
+ * 必选:是
+
+ * 默认值:无
+
+* **column**
+
+ * 描述:目的表字段列表,可以为["*"],或者具体的字段列表,例如["a", "b", "c"]
+
+ * 必选:是
+
+ * 默认值:无
+
+* **overWrite**
+
+ * 描述:ADS写入是否覆盖当前写入的表,true为覆盖写入,false为不覆盖(追加)写入。当writeMode为Load,该值才会生效。
+
+ * 必选:是
+
+ * 默认值:无
+
+
+* **lifeCycle**
+
+ * 描述:ADS 临时表生命周期。当writeMode为Load时,该值才会生效。
+
+ * 必选:是
+
+ * 默认值:无
+
+ * **batchSize**
+
+ * 描述:ADS 提交数据写的批量条数,当writeMode为insert时,该值才会生效。
+
+ * 必选:writeMode为insert时才有用
+
+ * 默认值:32
+
+* **bufferSize**
+
+ * 描述:DataX数据收集缓冲区大小,缓冲区的目的是攒一个较大的buffer,源头的数据首先进入到此buffer中进行排序,排序完成后再提交ads写。排序是根据ads的分区列模式进行的,排序的目的是数据顺序对ADS服务端更友好,出于性能考虑。bufferSize缓冲区中的数据会经过batchSize批量提交到ADS中,一般如果要设置bufferSize,设置bufferSize为batchSize数量的多倍。当writeMode为insert时,该值才会生效。
+
+ * 必选:writeMode为insert时才有用
+
+ * 默认值:默认不配置不开启此功能
+
+
+### 3.3 类型转换
+
+| DataX 内部类型| ADS 数据类型 |
+| -------- | ----- |
+| Long |int, tinyint, smallint, int, bigint|
+| Double |float, double, decimal|
+| String |varchar |
+| Date |date |
+| Boolean |bool |
+| Bytes |无 |
+
+ 注意:
+
+* multivalue ADS支持multivalue类型,DataX对于该类型支持待定?
+
+
+## 4 插件约束
+
+如果Reader为ODPS,且ADSWriter写入模式为Load模式时,ODPS的partition只支持如下三种配置方式(以两级分区为例):
+```
+"partition":["pt=*,ds=*"] (读取test表所有分区的数据)
+"partition":["pt=1,ds=*"] (读取test表下面,一级分区pt=1下面的所有二级分区)
+"partition":["pt=1,ds=hangzhou"] (读取test表下面,一级分区pt=1下面,二级分区ds=hz的数据)
+```
+
+## 5 性能报告(线上环境实测)
+
+### 5.1 环境准备
+
+### 5.2 测试报告
+
+## 6 FAQ
diff --git a/adswriter/pom.xml b/adswriter/pom.xml
new file mode 100644
index 0000000000..de407dfeee
--- /dev/null
+++ b/adswriter/pom.xml
@@ -0,0 +1,107 @@
+
+
+
+ com.alibaba.datax
+ datax-all
+ 0.0.1-SNAPSHOT
+
+ 4.0.0
+
+ adswriter
+ adswriter
+ jar
+
+
+
+ com.alibaba.datax
+ datax-common
+ ${datax-project-version}
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+ mysql
+ mysql-connector-java
+
+
+
+
+ com.alibaba.datax
+ datax-core
+ ${datax-project-version}
+
+
+
+ com.alibaba.datax
+ plugin-rdbms-util
+ ${datax-project-version}
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+ org.apache.commons
+ commons-exec
+ 1.3
+
+
+ com.alibaba.datax
+ odpswriter
+ ${datax-project-version}
+
+
+ ch.qos.logback
+ logback-classic
+
+
+ mysql
+ mysql-connector-java
+ 5.1.31
+
+
+ commons-configuration
+ commons-configuration
+ 1.10
+
+
+
+
+
+
+
+
+ maven-compiler-plugin
+
+ 1.6
+ 1.6
+ ${project-sourceEncoding}
+
+
+
+
+ maven-assembly-plugin
+
+
+ src/main/assembly/package.xml
+
+ datax
+
+
+
+ dwzip
+ package
+
+ single
+
+
+
+
+
+
+
diff --git a/adswriter/src/main/assembly/package.xml b/adswriter/src/main/assembly/package.xml
new file mode 100644
index 0000000000..c1fb64bb84
--- /dev/null
+++ b/adswriter/src/main/assembly/package.xml
@@ -0,0 +1,36 @@
+
+
+
+ dir
+
+ false
+
+
+ src/main/resources
+
+ plugin.json
+ config.properties
+ plugin_job_template.json
+
+ plugin/writer/adswriter
+
+
+ target/
+
+ adswriter-0.0.1-SNAPSHOT.jar
+
+ plugin/writer/adswriter
+
+
+
+
+
+ false
+ plugin/writer/adswriter/libs
+ runtime
+
+
+
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsException.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsException.java
new file mode 100644
index 0000000000..f0d6f92894
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsException.java
@@ -0,0 +1,40 @@
+package com.alibaba.datax.plugin.writer.adswriter;
+
+public class AdsException extends Exception {
+
+ private static final long serialVersionUID = 1080618043484079794L;
+
+ public final static int ADS_CONN_URL_NOT_SET = -100;
+ public final static int ADS_CONN_USERNAME_NOT_SET = -101;
+ public final static int ADS_CONN_PASSWORD_NOT_SET = -102;
+ public final static int ADS_CONN_SCHEMA_NOT_SET = -103;
+
+ public final static int JOB_NOT_EXIST = -200;
+ public final static int JOB_FAILED = -201;
+
+ public final static int ADS_LOADDATA_SCHEMA_NULL = -300;
+ public final static int ADS_LOADDATA_TABLE_NULL = -301;
+ public final static int ADS_LOADDATA_SOURCEPATH_NULL = -302;
+ public final static int ADS_LOADDATA_JOBID_NOT_AVAIL = -303;
+ public final static int ADS_LOADDATA_FAILED = -304;
+
+ public final static int ADS_TABLEMETA_SCHEMA_NULL = -404;
+ public final static int ADS_TABLEMETA_TABLE_NULL = -405;
+
+ public final static int OTHER = -999;
+
+ private int code = OTHER;
+ private String message;
+
+ public AdsException(int code, String message, Throwable e) {
+ super(message, e);
+ this.code = code;
+ this.message = message;
+ }
+
+ @Override
+ public String getMessage() {
+ return "Code=" + this.code + " Message=" + this.message;
+ }
+
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsWriter.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsWriter.java
new file mode 100644
index 0000000000..7e04c844a5
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsWriter.java
@@ -0,0 +1,388 @@
+package com.alibaba.datax.plugin.writer.adswriter;
+
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordReceiver;
+import com.alibaba.datax.common.plugin.TaskPluginCollector;
+import com.alibaba.datax.common.spi.Writer;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.util.DBUtil;
+import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
+import com.alibaba.datax.plugin.rdbms.writer.util.WriterUtil;
+import com.alibaba.datax.plugin.writer.adswriter.ads.ColumnInfo;
+import com.alibaba.datax.plugin.writer.adswriter.ads.TableInfo;
+import com.alibaba.datax.plugin.writer.adswriter.insert.AdsInsertProxy;
+import com.alibaba.datax.plugin.writer.adswriter.insert.AdsInsertUtil;
+import com.alibaba.datax.plugin.writer.adswriter.load.AdsHelper;
+import com.alibaba.datax.plugin.writer.adswriter.load.TableMetaHelper;
+import com.alibaba.datax.plugin.writer.adswriter.load.TransferProjectConf;
+import com.alibaba.datax.plugin.writer.adswriter.odps.TableMeta;
+import com.alibaba.datax.plugin.writer.adswriter.util.AdsUtil;
+import com.alibaba.datax.plugin.writer.adswriter.util.Constant;
+import com.alibaba.datax.plugin.writer.adswriter.util.Key;
+import com.alibaba.datax.plugin.writer.odpswriter.OdpsWriter;
+import com.aliyun.odps.Instance;
+import com.aliyun.odps.Odps;
+import com.aliyun.odps.OdpsException;
+import com.aliyun.odps.account.Account;
+import com.aliyun.odps.account.AliyunAccount;
+import com.aliyun.odps.task.SQLTask;
+
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.Connection;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+public class AdsWriter extends Writer {
+
+ public static class Job extends Writer.Job {
+ private static final Logger LOG = LoggerFactory.getLogger(Writer.Job.class);
+ public final static String ODPS_READER = "odpsreader";
+
+ private OdpsWriter.Job odpsWriterJobProxy = new OdpsWriter.Job();
+ private Configuration originalConfig;
+ private Configuration readerConfig;
+
+ /**
+ * 持有ads账号的ads helper
+ */
+ private AdsHelper adsHelper;
+ /**
+ * 持有odps账号的ads helper
+ */
+ private AdsHelper odpsAdsHelper;
+ /**
+ * 中转odps的配置,对应到writer配置的parameter.odps部分
+ */
+ private TransferProjectConf transProjConf;
+ private final int ODPSOVERTIME = 120000;
+ private String odpsTransTableName;
+
+ private String writeMode;
+ private long startTime;
+
+ @Override
+ public void init() {
+ startTime = System.currentTimeMillis();
+ this.originalConfig = super.getPluginJobConf();
+ this.writeMode = this.originalConfig.getString(Key.WRITE_MODE);
+ if(null == this.writeMode) {
+ LOG.warn("您未指定[writeMode]参数, 默认采用load模式, load模式只能用于离线表");
+ this.writeMode = Constant.LOADMODE;
+ this.originalConfig.set(Key.WRITE_MODE, "load");
+ }
+
+ if(Constant.LOADMODE.equalsIgnoreCase(this.writeMode)) {
+ AdsUtil.checkNecessaryConfig(this.originalConfig, this.writeMode);
+ loadModeInit();
+ } else if(Constant.INSERTMODE.equalsIgnoreCase(this.writeMode) || Constant.STREAMMODE.equalsIgnoreCase(this.writeMode)) {
+ AdsUtil.checkNecessaryConfig(this.originalConfig, this.writeMode);
+ List allColumns = AdsInsertUtil.getAdsTableColumnNames(originalConfig);
+ AdsInsertUtil.dealColumnConf(originalConfig, allColumns);
+
+ LOG.debug("After job init(), originalConfig now is:[\n{}\n]",
+ originalConfig.toJSON());
+ } else {
+ throw DataXException.asDataXException(AdsWriterErrorCode.INVALID_CONFIG_VALUE, "writeMode 必须为 'load' 或者 'insert' 或者 'stream'");
+ }
+ }
+
+ private void loadModeInit() {
+ this.adsHelper = AdsUtil.createAdsHelper(this.originalConfig);
+ this.odpsAdsHelper = AdsUtil.createAdsHelperWithOdpsAccount(this.originalConfig);
+ this.transProjConf = TransferProjectConf.create(this.originalConfig);
+ // 打印权限申请流程到日志中
+ LOG.info(String
+ .format("%s%n%s%n%s",
+ "如果您直接是odps->ads数据同步, 需要做2方面授权:",
+ "[1] ads官方账号至少需要有待同步表的describe和select权限, 因为ads系统需要获取odps待同步表的结构和数据信息",
+ "[2] 您配置的ads数据源访问账号ak, 需要有向指定的ads数据库发起load data的权限, 您可以在ads系统中添加授权"));
+ LOG.info(String
+ .format("%s%s%n%s%n%s",
+ "如果您直接是rds(或其它非odps数据源)->ads数据同步, 流程是先将数据装载如odps临时表,再从odps临时表->ads, ",
+ String.format("中转odps项目为%s,中转项目账号为%s, 权限方面:",
+ this.transProjConf.getProject(),
+ this.transProjConf.getAccount()),
+ "[1] ads官方账号至少需要有待同步表(这里是odps临时表)的describe和select权限, 因为ads系统需要获取odps待同步表的结构和数据信息,此部分部署时已经完成授权",
+ String.format("[2] 中转odps对应的账号%s, 需要有向指定的ads数据库发起load data的权限, 您可以在ads系统中添加授权", this.transProjConf.getAccount())));
+
+ /**
+ * 如果是从odps导入到ads,直接load data然后System.exit()
+ */
+ if (super.getPeerPluginName().equals(ODPS_READER)) {
+ transferFromOdpsAndExit();
+ }
+ Account odpsAccount;
+ odpsAccount = new AliyunAccount(transProjConf.getAccessId(), transProjConf.getAccessKey());
+
+ Odps odps = new Odps(odpsAccount);
+ odps.setEndpoint(transProjConf.getOdpsServer());
+ odps.setDefaultProject(transProjConf.getProject());
+
+ TableMeta tableMeta;
+ try {
+ String adsTable = this.originalConfig.getString(Key.ADS_TABLE);
+ TableInfo tableInfo = adsHelper.getTableInfo(adsTable);
+ int lifeCycle = this.originalConfig.getInt(Key.Life_CYCLE);
+ tableMeta = TableMetaHelper.createTempODPSTable(tableInfo, lifeCycle);
+ this.odpsTransTableName = tableMeta.getTableName();
+ String sql = tableMeta.toDDL();
+ LOG.info("正在创建ODPS临时表: "+sql);
+ Instance instance = SQLTask.run(odps, transProjConf.getProject(), sql, null, null);
+ boolean terminated = false;
+ int time = 0;
+ while (!terminated && time < ODPSOVERTIME) {
+ Thread.sleep(1000);
+ terminated = instance.isTerminated();
+ time += 1000;
+ }
+ LOG.info("正在创建ODPS临时表成功");
+ } catch (AdsException e) {
+ throw DataXException.asDataXException(AdsWriterErrorCode.ODPS_CREATETABLE_FAILED, e);
+ }catch (OdpsException e) {
+ throw DataXException.asDataXException(AdsWriterErrorCode.ODPS_CREATETABLE_FAILED,e);
+ } catch (InterruptedException e) {
+ throw DataXException.asDataXException(AdsWriterErrorCode.ODPS_CREATETABLE_FAILED,e);
+ }
+
+ Configuration newConf = AdsUtil.generateConf(this.originalConfig, this.odpsTransTableName,
+ tableMeta, this.transProjConf);
+ odpsWriterJobProxy.setPluginJobConf(newConf);
+ odpsWriterJobProxy.init();
+ }
+
+ /**
+ * 当reader是odps的时候,直接call ads的load接口,完成后退出。
+ * 这种情况下,用户在odps reader里头填写的参数只有部分有效。
+ * 其中accessId、accessKey是忽略掉iao的。
+ */
+ private void transferFromOdpsAndExit() {
+ this.readerConfig = super.getPeerPluginJobConf();
+ String odpsTableName = this.readerConfig.getString(Key.ODPSTABLENAME);
+ List userConfiguredPartitions = this.readerConfig.getList(Key.PARTITION, String.class);
+
+ if (userConfiguredPartitions == null) {
+ userConfiguredPartitions = Collections.emptyList();
+ }
+
+ if(userConfiguredPartitions.size() > 1)
+ throw DataXException.asDataXException(AdsWriterErrorCode.ODPS_PARTITION_FAILED, "");
+
+ if(userConfiguredPartitions.size() == 0) {
+ loadAdsData(adsHelper, odpsTableName,null);
+ }else {
+ loadAdsData(adsHelper, odpsTableName,userConfiguredPartitions.get(0));
+ }
+ System.exit(0);
+ }
+
+ // 一般来说,是需要推迟到 task 中进行pre 的执行(单表情况例外)
+ @Override
+ public void prepare() {
+ if(Constant.LOADMODE.equalsIgnoreCase(this.writeMode)) {
+ //导数据到odps表中
+ this.odpsWriterJobProxy.prepare();
+ } else {
+ // 实时表模式非分库分表
+ String adsTable = this.originalConfig.getString(Key.ADS_TABLE);
+ List preSqls = this.originalConfig.getList(Key.PRE_SQL,
+ String.class);
+ List renderedPreSqls = WriterUtil.renderPreOrPostSqls(
+ preSqls, adsTable);
+ if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) {
+ // 说明有 preSql 配置,则此处删除掉
+ this.originalConfig.remove(Key.PRE_SQL);
+ Connection preConn = AdsUtil.getAdsConnect(this.originalConfig);
+ LOG.info("Begin to execute preSqls:[{}]. context info:{}.",
+ StringUtils.join(renderedPreSqls, ";"),
+ this.originalConfig.getString(Key.ADS_URL));
+ WriterUtil.executeSqls(preConn, renderedPreSqls,
+ this.originalConfig.getString(Key.ADS_URL),
+ DataBaseType.ADS);
+ DBUtil.closeDBResources(null, null, preConn);
+ }
+ }
+ }
+
+ @Override
+ public List split(int mandatoryNumber) {
+ if(Constant.LOADMODE.equalsIgnoreCase(this.writeMode)) {
+ return this.odpsWriterJobProxy.split(mandatoryNumber);
+ } else {
+ List splitResult = new ArrayList();
+ for(int i = 0; i < mandatoryNumber; i++) {
+ splitResult.add(this.originalConfig.clone());
+ }
+ return splitResult;
+ }
+ }
+
+ // 一般来说,是需要推迟到 task 中进行post 的执行(单表情况例外)
+ @Override
+ public void post() {
+ if(Constant.LOADMODE.equalsIgnoreCase(this.writeMode)) {
+ loadAdsData(odpsAdsHelper, this.odpsTransTableName, null);
+ this.odpsWriterJobProxy.post();
+ } else {
+ // 实时表模式非分库分表
+ String adsTable = this.originalConfig.getString(Key.ADS_TABLE);
+ List postSqls = this.originalConfig.getList(
+ Key.POST_SQL, String.class);
+ List renderedPostSqls = WriterUtil.renderPreOrPostSqls(
+ postSqls, adsTable);
+ if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) {
+ // 说明有 preSql 配置,则此处删除掉
+ this.originalConfig.remove(Key.POST_SQL);
+ Connection postConn = AdsUtil.getAdsConnect(this.originalConfig);
+ LOG.info(
+ "Begin to execute postSqls:[{}]. context info:{}.",
+ StringUtils.join(renderedPostSqls, ";"),
+ this.originalConfig.getString(Key.ADS_URL));
+ WriterUtil.executeSqls(postConn, renderedPostSqls,
+ this.originalConfig.getString(Key.ADS_URL),
+ DataBaseType.ADS);
+ DBUtil.closeDBResources(null, null, postConn);
+ }
+ }
+ }
+
+ @Override
+ public void destroy() {
+ if(Constant.LOADMODE.equalsIgnoreCase(this.writeMode)) {
+ this.odpsWriterJobProxy.destroy();
+ } else {
+ //insert mode do noting
+ }
+ }
+
+ private void loadAdsData(AdsHelper helper, String odpsTableName, String odpsPartition) {
+
+ String table = this.originalConfig.getString(Key.ADS_TABLE);
+ String project;
+ if (super.getPeerPluginName().equals(ODPS_READER)) {
+ project = this.readerConfig.getString(Key.PROJECT);
+ } else {
+ project = this.transProjConf.getProject();
+ }
+ String partition = this.originalConfig.getString(Key.PARTITION);
+ String sourcePath = AdsUtil.generateSourcePath(project,odpsTableName,odpsPartition);
+ /**
+ * 因为之前检查过,所以不用担心unbox的时候NPE
+ */
+ boolean overwrite = this.originalConfig.getBool(Key.OVER_WRITE);
+ try {
+ String id = helper.loadData(table,partition,sourcePath,overwrite);
+ LOG.info("ADS Load Data任务已经提交,job id: " + id);
+ boolean terminated = false;
+ int time = 0;
+ while(!terminated) {
+ Thread.sleep(120000);
+ terminated = helper.checkLoadDataJobStatus(id);
+ time += 2;
+ LOG.info("ADS 正在导数据中,整个过程需要20分钟以上,请耐心等待,目前已执行 "+ time+" 分钟");
+ }
+ LOG.info("ADS 导数据已成功");
+ } catch (AdsException e) {
+ if (super.getPeerPluginName().equals(ODPS_READER)) {
+ // TODO 使用云账号
+ AdsWriterErrorCode.ADS_LOAD_ODPS_FAILED.setAdsAccount(helper.getUserName());
+ throw DataXException.asDataXException(AdsWriterErrorCode.ADS_LOAD_ODPS_FAILED,e);
+ } else {
+ throw DataXException.asDataXException(AdsWriterErrorCode.ADS_LOAD_TEMP_ODPS_FAILED,e);
+ }
+ } catch (InterruptedException e) {
+ throw DataXException.asDataXException(AdsWriterErrorCode.ODPS_CREATETABLE_FAILED,e);
+ }
+ }
+ }
+
+ public static class Task extends Writer.Task {
+ private static final Logger LOG = LoggerFactory.getLogger(Writer.Task.class);
+ private Configuration writerSliceConfig;
+ private OdpsWriter.Task odpsWriterTaskProxy = new OdpsWriter.Task();
+
+
+ private String writeMode;
+ private String schema;
+ private String table;
+ private int columnNumber;
+ // warn: 只有在insert, stream模式才有, 对于load模式表明为odps临时表了
+ private TableInfo tableInfo;
+
+ @Override
+ public void init() {
+ writerSliceConfig = super.getPluginJobConf();
+ this.writeMode = this.writerSliceConfig.getString(Key.WRITE_MODE);
+ this.schema = writerSliceConfig.getString(Key.SCHEMA);
+ this.table = writerSliceConfig.getString(Key.ADS_TABLE);
+
+ if(Constant.LOADMODE.equalsIgnoreCase(this.writeMode)) {
+ odpsWriterTaskProxy.setPluginJobConf(writerSliceConfig);
+ odpsWriterTaskProxy.init();
+ } else if(Constant.INSERTMODE.equalsIgnoreCase(this.writeMode) || Constant.STREAMMODE.equalsIgnoreCase(this.writeMode)) {
+ try {
+ this.tableInfo = AdsUtil.createAdsHelper(this.writerSliceConfig).getTableInfo(this.table);
+ } catch (AdsException e) {
+ throw DataXException.asDataXException(AdsWriterErrorCode.CREATE_ADS_HELPER_FAILED, e);
+ }
+ List allColumns = new ArrayList();
+ List columnInfo = this.tableInfo.getColumns();
+ for (ColumnInfo eachColumn : columnInfo) {
+ allColumns.add(eachColumn.getName());
+ }
+ LOG.info("table:[{}] all columns:[\n{}\n].", this.writerSliceConfig.get(Key.ADS_TABLE), StringUtils.join(allColumns, ","));
+ AdsInsertUtil.dealColumnConf(writerSliceConfig, allColumns);
+ List userColumns = writerSliceConfig.getList(Key.COLUMN, String.class);
+ this.columnNumber = userColumns.size();
+ } else {
+ throw DataXException.asDataXException(AdsWriterErrorCode.INVALID_CONFIG_VALUE, "writeMode 必须为 'load' 或者 'insert' 或者 'stream'");
+ }
+ }
+
+ @Override
+ public void prepare() {
+ if(Constant.LOADMODE.equalsIgnoreCase(this.writeMode)) {
+ odpsWriterTaskProxy.prepare();
+ } else {
+ //do nothing
+ }
+ }
+
+ public void startWrite(RecordReceiver recordReceiver) {
+ // 这里的是非odps数据源->odps中转临时表数据同步, load操作在job post阶段完成
+ if(Constant.LOADMODE.equalsIgnoreCase(this.writeMode)) {
+ odpsWriterTaskProxy.setTaskPluginCollector(super.getTaskPluginCollector());
+ odpsWriterTaskProxy.startWrite(recordReceiver);
+ } else {
+ // insert 模式
+ List columns = writerSliceConfig.getList(Key.COLUMN, String.class);
+ Connection connection = AdsUtil.getAdsConnect(this.writerSliceConfig);
+ TaskPluginCollector taskPluginCollector = super.getTaskPluginCollector();
+ AdsInsertProxy proxy = new AdsInsertProxy(schema + "." + table, columns, writerSliceConfig, taskPluginCollector, this.tableInfo);
+ proxy.startWriteWithConnection(recordReceiver, connection, columnNumber);
+ }
+ }
+
+ @Override
+ public void post() {
+ if(Constant.LOADMODE.equalsIgnoreCase(this.writeMode)) {
+ odpsWriterTaskProxy.post();
+ } else {
+ //do noting until now
+ }
+ }
+
+ @Override
+ public void destroy() {
+ if(Constant.LOADMODE.equalsIgnoreCase(this.writeMode)) {
+ odpsWriterTaskProxy.destroy();
+ } else {
+ //do noting until now
+ }
+ }
+ }
+
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsWriterErrorCode.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsWriterErrorCode.java
new file mode 100644
index 0000000000..a1ac3c107a
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsWriterErrorCode.java
@@ -0,0 +1,54 @@
+package com.alibaba.datax.plugin.writer.adswriter;
+
+import com.alibaba.datax.common.spi.ErrorCode;
+
+public enum AdsWriterErrorCode implements ErrorCode {
+ REQUIRED_VALUE("AdsWriter-00", "您缺失了必须填写的参数值."),
+ NO_ADS_TABLE("AdsWriter-01", "ADS表不存在."),
+ ODPS_CREATETABLE_FAILED("AdsWriter-02", "创建ODPS临时表失败,请联系ADS 技术支持"),
+ ADS_LOAD_TEMP_ODPS_FAILED("AdsWriter-03", "ADS从ODPS临时表导数据失败,请联系ADS 技术支持"),
+ TABLE_TRUNCATE_ERROR("AdsWriter-04", "清空 ODPS 目的表时出错."),
+ CREATE_ADS_HELPER_FAILED("AdsWriter-05", "创建ADSHelper对象出错,请联系ADS 技术支持"),
+ ODPS_PARTITION_FAILED("AdsWriter-06", "ODPS Reader不允许配置多个partition,目前只支持三种配置方式,\"partition\":[\"pt=*,ds=*\"](读取test表所有分区的数据); \n" +
+ "\"partition\":[\"pt=1,ds=*\"](读取test表下面,一级分区pt=1下面的所有二级分区); \n" +
+ "\"partition\":[\"pt=1,ds=hangzhou\"](读取test表下面,一级分区pt=1下面,二级分区ds=hz的数据)"),
+ ADS_LOAD_ODPS_FAILED("AdsWriter-07", "ADS从ODPS导数据失败,请联系ADS 技术支持,先检查ADS账号是否已加到该ODPS Project中。ADS账号为:"),
+ INVALID_CONFIG_VALUE("AdsWriter-08", "不合法的配置值."),
+
+ GET_ADS_TABLE_MEATA_FAILED("AdsWriter-11", "获取ADS table原信息失败");
+
+ private final String code;
+ private final String description;
+ private String adsAccount;
+
+
+ private AdsWriterErrorCode(String code, String description) {
+ this.code = code;
+ this.description = description;
+ }
+
+ public void setAdsAccount(String adsAccount) {
+ this.adsAccount = adsAccount;
+ }
+
+ @Override
+ public String getCode() {
+ return this.code;
+ }
+
+ @Override
+ public String getDescription() {
+ return this.description;
+ }
+
+ @Override
+ public String toString() {
+ if (this.code.equals("AdsWriter-07")){
+ return String.format("Code:[%s], Description:[%s][%s]. ", this.code,
+ this.description,adsAccount);
+ }else{
+ return String.format("Code:[%s], Description:[%s]. ", this.code,
+ this.description);
+ }
+ }
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/ads/ColumnDataType.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/ads/ColumnDataType.java
new file mode 100644
index 0000000000..d719c318bf
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/ads/ColumnDataType.java
@@ -0,0 +1,406 @@
+package com.alibaba.datax.plugin.writer.adswriter.ads;
+
+import java.math.BigDecimal;
+import java.sql.Date;
+import java.sql.Time;
+import java.sql.Timestamp;
+import java.sql.Types;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+/**
+ * ADS column data type.
+ *
+ * @since 0.0.1
+ */
+public class ColumnDataType {
+
+ // public static final int NULL = 0;
+ public static final int BOOLEAN = 1;
+ public static final int BYTE = 2;
+ public static final int SHORT = 3;
+ public static final int INT = 4;
+ public static final int LONG = 5;
+ public static final int DECIMAL = 6;
+ public static final int DOUBLE = 7;
+ public static final int FLOAT = 8;
+ public static final int TIME = 9;
+ public static final int DATE = 10;
+ public static final int TIMESTAMP = 11;
+ public static final int STRING = 13;
+ // public static final int STRING_IGNORECASE = 14;
+ // public static final int STRING_FIXED = 21;
+
+ public static final int MULTI_VALUE = 22;
+
+ public static final int TYPE_COUNT = MULTI_VALUE + 1;
+
+ /**
+ * The list of types. An ArrayList so that Tomcat doesn't set it to null when clearing references.
+ */
+ private static final ArrayList TYPES = new ArrayList();
+ private static final HashMap TYPES_BY_NAME = new HashMap();
+ private static final ArrayList TYPES_BY_VALUE_TYPE = new ArrayList();
+
+ /**
+ * @param dataTypes
+ * @return
+ */
+ public static String getNames(int[] dataTypes) {
+ List names = new ArrayList(dataTypes.length);
+ for (final int dataType : dataTypes) {
+ names.add(ColumnDataType.getDataType(dataType).name);
+ }
+ return names.toString();
+ }
+
+ public int type;
+ public String name;
+ public int sqlType;
+ public String jdbc;
+
+ /**
+ * How closely the data type maps to the corresponding JDBC SQL type (low is best).
+ */
+ public int sqlTypePos;
+
+ static {
+ for (int i = 0; i < TYPE_COUNT; i++) {
+ TYPES_BY_VALUE_TYPE.add(null);
+ }
+ // add(NULL, Types.NULL, "Null", new String[] { "NULL" });
+ add(STRING, Types.VARCHAR, "String", new String[] { "VARCHAR", "VARCHAR2", "NVARCHAR", "NVARCHAR2",
+ "VARCHAR_CASESENSITIVE", "CHARACTER VARYING", "TID" });
+ add(STRING, Types.LONGVARCHAR, "String", new String[] { "LONGVARCHAR", "LONGNVARCHAR" });
+ // add(STRING_FIXED, Types.CHAR, "String", new String[] { "CHAR", "CHARACTER", "NCHAR" });
+ // add(STRING_IGNORECASE, Types.VARCHAR, "String", new String[] { "VARCHAR_IGNORECASE" });
+ add(BOOLEAN, Types.BOOLEAN, "Boolean", new String[] { "BOOLEAN", "BIT", "BOOL" });
+ add(BYTE, Types.TINYINT, "Byte", new String[] { "TINYINT" });
+ add(SHORT, Types.SMALLINT, "Short", new String[] { "SMALLINT", "YEAR", "INT2" });
+ add(INT, Types.INTEGER, "Int", new String[] { "INTEGER", "INT", "MEDIUMINT", "INT4", "SIGNED" });
+ add(INT, Types.INTEGER, "Int", new String[] { "SERIAL" });
+ add(LONG, Types.BIGINT, "Long", new String[] { "BIGINT", "INT8", "LONG" });
+ add(LONG, Types.BIGINT, "Long", new String[] { "IDENTITY", "BIGSERIAL" });
+ add(DECIMAL, Types.DECIMAL, "BigDecimal", new String[] { "DECIMAL", "DEC" });
+ add(DECIMAL, Types.NUMERIC, "BigDecimal", new String[] { "NUMERIC", "NUMBER" });
+ add(FLOAT, Types.REAL, "Float", new String[] { "REAL", "FLOAT4" });
+ add(DOUBLE, Types.DOUBLE, "Double", new String[] { "DOUBLE", "DOUBLE PRECISION" });
+ add(DOUBLE, Types.FLOAT, "Double", new String[] { "FLOAT", "FLOAT8" });
+ add(TIME, Types.TIME, "Time", new String[] { "TIME" });
+ add(DATE, Types.DATE, "Date", new String[] { "DATE" });
+ add(TIMESTAMP, Types.TIMESTAMP, "Timestamp", new String[] { "TIMESTAMP", "DATETIME", "SMALLDATETIME" });
+ add(MULTI_VALUE, Types.VARCHAR, "String", new String[] { "MULTIVALUE" });
+ }
+
+ private static void add(int type, int sqlType, String jdbc, String[] names) {
+ for (int i = 0; i < names.length; i++) {
+ ColumnDataType dt = new ColumnDataType();
+ dt.type = type;
+ dt.sqlType = sqlType;
+ dt.jdbc = jdbc;
+ dt.name = names[i];
+ for (ColumnDataType t2 : TYPES) {
+ if (t2.sqlType == dt.sqlType) {
+ dt.sqlTypePos++;
+ }
+ }
+ TYPES_BY_NAME.put(dt.name, dt);
+ if (TYPES_BY_VALUE_TYPE.get(type) == null) {
+ TYPES_BY_VALUE_TYPE.set(type, dt);
+ }
+ TYPES.add(dt);
+ }
+ }
+
+ /**
+ * Get the list of data types.
+ *
+ * @return the list
+ */
+ public static ArrayList getTypes() {
+ return TYPES;
+ }
+
+ /**
+ * Get the name of the Java class for the given value type.
+ *
+ * @param type the value type
+ * @return the class name
+ */
+ public static String getTypeClassName(int type) {
+ switch (type) {
+ case BOOLEAN:
+ // "java.lang.Boolean";
+ return Boolean.class.getName();
+ case BYTE:
+ // "java.lang.Byte";
+ return Byte.class.getName();
+ case SHORT:
+ // "java.lang.Short";
+ return Short.class.getName();
+ case INT:
+ // "java.lang.Integer";
+ return Integer.class.getName();
+ case LONG:
+ // "java.lang.Long";
+ return Long.class.getName();
+ case DECIMAL:
+ // "java.math.BigDecimal";
+ return BigDecimal.class.getName();
+ case TIME:
+ // "java.sql.Time";
+ return Time.class.getName();
+ case DATE:
+ // "java.sql.Date";
+ return Date.class.getName();
+ case TIMESTAMP:
+ // "java.sql.Timestamp";
+ return Timestamp.class.getName();
+ case STRING:
+ // case STRING_IGNORECASE:
+ // case STRING_FIXED:
+ case MULTI_VALUE:
+ // "java.lang.String";
+ return String.class.getName();
+ case DOUBLE:
+ // "java.lang.Double";
+ return Double.class.getName();
+ case FLOAT:
+ // "java.lang.Float";
+ return Float.class.getName();
+ // case NULL:
+ // return null;
+ default:
+ throw new IllegalArgumentException("type=" + type);
+ }
+ }
+
+ /**
+ * Get the data type object for the given value type.
+ *
+ * @param type the value type
+ * @return the data type object
+ */
+ public static ColumnDataType getDataType(int type) {
+ if (type < 0 || type >= TYPE_COUNT) {
+ throw new IllegalArgumentException("type=" + type);
+ }
+ ColumnDataType dt = TYPES_BY_VALUE_TYPE.get(type);
+ // if (dt == null) {
+ // dt = TYPES_BY_VALUE_TYPE.get(NULL);
+ // }
+ return dt;
+ }
+
+ /**
+ * Convert a value type to a SQL type.
+ *
+ * @param type the value type
+ * @return the SQL type
+ */
+ public static int convertTypeToSQLType(int type) {
+ return getDataType(type).sqlType;
+ }
+
+ /**
+ * Convert a SQL type to a value type.
+ *
+ * @param sqlType the SQL type
+ * @return the value type
+ */
+ public static int convertSQLTypeToValueType(int sqlType) {
+ switch (sqlType) {
+ // case Types.CHAR:
+ // case Types.NCHAR:
+ // return STRING_FIXED;
+ case Types.VARCHAR:
+ case Types.LONGVARCHAR:
+ case Types.NVARCHAR:
+ case Types.LONGNVARCHAR:
+ return STRING;
+ case Types.NUMERIC:
+ case Types.DECIMAL:
+ return DECIMAL;
+ case Types.BIT:
+ case Types.BOOLEAN:
+ return BOOLEAN;
+ case Types.INTEGER:
+ return INT;
+ case Types.SMALLINT:
+ return SHORT;
+ case Types.TINYINT:
+ return BYTE;
+ case Types.BIGINT:
+ return LONG;
+ case Types.REAL:
+ return FLOAT;
+ case Types.DOUBLE:
+ case Types.FLOAT:
+ return DOUBLE;
+ case Types.DATE:
+ return DATE;
+ case Types.TIME:
+ return TIME;
+ case Types.TIMESTAMP:
+ return TIMESTAMP;
+ // case Types.NULL:
+ // return NULL;
+ default:
+ throw new IllegalArgumentException("JDBC Type: " + sqlType);
+ }
+ }
+
+ /**
+ * Get the value type for the given Java class.
+ *
+ * @param x the Java class
+ * @return the value type
+ */
+ public static int getTypeFromClass(Class> x) {
+ // if (x == null || Void.TYPE == x) {
+ // return NULL;
+ // }
+ if (x.isPrimitive()) {
+ x = getNonPrimitiveClass(x);
+ }
+ if (String.class == x) {
+ return STRING;
+ } else if (Integer.class == x) {
+ return INT;
+ } else if (Long.class == x) {
+ return LONG;
+ } else if (Boolean.class == x) {
+ return BOOLEAN;
+ } else if (Double.class == x) {
+ return DOUBLE;
+ } else if (Byte.class == x) {
+ return BYTE;
+ } else if (Short.class == x) {
+ return SHORT;
+ } else if (Float.class == x) {
+ return FLOAT;
+ // } else if (Void.class == x) {
+ // return NULL;
+ } else if (BigDecimal.class.isAssignableFrom(x)) {
+ return DECIMAL;
+ } else if (Date.class.isAssignableFrom(x)) {
+ return DATE;
+ } else if (Time.class.isAssignableFrom(x)) {
+ return TIME;
+ } else if (Timestamp.class.isAssignableFrom(x)) {
+ return TIMESTAMP;
+ } else if (java.util.Date.class.isAssignableFrom(x)) {
+ return TIMESTAMP;
+ } else {
+ throw new IllegalArgumentException("class=" + x);
+ }
+ }
+
+ /**
+ * Convert primitive class names to java.lang.* class names.
+ *
+ * @param clazz the class (for example: int)
+ * @return the non-primitive class (for example: java.lang.Integer)
+ */
+ public static Class> getNonPrimitiveClass(Class> clazz) {
+ if (!clazz.isPrimitive()) {
+ return clazz;
+ } else if (clazz == boolean.class) {
+ return Boolean.class;
+ } else if (clazz == byte.class) {
+ return Byte.class;
+ } else if (clazz == char.class) {
+ return Character.class;
+ } else if (clazz == double.class) {
+ return Double.class;
+ } else if (clazz == float.class) {
+ return Float.class;
+ } else if (clazz == int.class) {
+ return Integer.class;
+ } else if (clazz == long.class) {
+ return Long.class;
+ } else if (clazz == short.class) {
+ return Short.class;
+ } else if (clazz == void.class) {
+ return Void.class;
+ }
+ return clazz;
+ }
+
+ /**
+ * Get a data type object from a type name.
+ *
+ * @param s the type name
+ * @return the data type object
+ */
+ public static ColumnDataType getTypeByName(String s) {
+ return TYPES_BY_NAME.get(s);
+ }
+
+ /**
+ * Check if the given value type is a String (VARCHAR,...).
+ *
+ * @param type the value type
+ * @return true if the value type is a String type
+ */
+ public static boolean isStringType(int type) {
+ if (type == STRING /* || type == STRING_FIXED || type == STRING_IGNORECASE */
+ || type == MULTI_VALUE) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * @return
+ */
+ public boolean supportsAdd() {
+ return supportsAdd(type);
+ }
+
+ /**
+ * Check if the given value type supports the add operation.
+ *
+ * @param type the value type
+ * @return true if add is supported
+ */
+ public static boolean supportsAdd(int type) {
+ switch (type) {
+ case BYTE:
+ case DECIMAL:
+ case DOUBLE:
+ case FLOAT:
+ case INT:
+ case LONG:
+ case SHORT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /**
+ * Get the data type that will not overflow when calling 'add' 2 billion times.
+ *
+ * @param type the value type
+ * @return the data type that supports adding
+ */
+ public static int getAddProofType(int type) {
+ switch (type) {
+ case BYTE:
+ return LONG;
+ case FLOAT:
+ return DOUBLE;
+ case INT:
+ return LONG;
+ case LONG:
+ return DECIMAL;
+ case SHORT:
+ return LONG;
+ default:
+ return type;
+ }
+ }
+
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/ads/ColumnInfo.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/ads/ColumnInfo.java
new file mode 100644
index 0000000000..030ce35d10
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/ads/ColumnInfo.java
@@ -0,0 +1,72 @@
+package com.alibaba.datax.plugin.writer.adswriter.ads;
+
+/**
+ * ADS column meta.
+ *
+ * select ordinal_position,column_name,data_type,type_name,column_comment
+ * from information_schema.columns
+ * where table_schema='db_name' and table_name='table_name'
+ * and is_deleted=0
+ * order by ordinal_position limit 1000
+ *
+ *
+ * @since 0.0.1
+ */
+public class ColumnInfo {
+
+ private int ordinal;
+ private String name;
+ private ColumnDataType dataType;
+ private boolean isDeleted;
+ private String comment;
+
+ public int getOrdinal() {
+ return ordinal;
+ }
+
+ public void setOrdinal(int ordinal) {
+ this.ordinal = ordinal;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public ColumnDataType getDataType() {
+ return dataType;
+ }
+
+ public void setDataType(ColumnDataType dataType) {
+ this.dataType = dataType;
+ }
+
+ public boolean isDeleted() {
+ return isDeleted;
+ }
+
+ public void setDeleted(boolean isDeleted) {
+ this.isDeleted = isDeleted;
+ }
+
+ public String getComment() {
+ return comment;
+ }
+
+ public void setComment(String comment) {
+ this.comment = comment;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder builder = new StringBuilder();
+ builder.append("ColumnInfo [ordinal=").append(ordinal).append(", name=").append(name).append(", dataType=")
+ .append(dataType).append(", isDeleted=").append(isDeleted).append(", comment=").append(comment)
+ .append("]");
+ return builder.toString();
+ }
+
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/ads/TableInfo.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/ads/TableInfo.java
new file mode 100644
index 0000000000..eac324d1fd
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/ads/TableInfo.java
@@ -0,0 +1,135 @@
+package com.alibaba.datax.plugin.writer.adswriter.ads;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * ADS table meta.
+ *
+ * select table_schema, table_name,comments
+ * from information_schema.tables
+ * where table_schema='alimama' and table_name='click_af' limit 1
+ *
+ *
+ * select ordinal_position,column_name,data_type,type_name,column_comment
+ * from information_schema.columns
+ * where table_schema='db_name' and table_name='table_name'
+ * and is_deleted=0
+ * order by ordinal_position limit 1000
+ *
+ *
+ * @since 0.0.1
+ */
+public class TableInfo {
+
+ private String tableSchema;
+ private String tableName;
+ private List columns;
+ private String comments;
+ private String tableType;
+
+ private String updateType;
+ private String partitionType;
+ private String partitionColumn;
+ private int partitionCount;
+ private List primaryKeyColumns;
+
+ @Override
+ public String toString() {
+ StringBuilder builder = new StringBuilder();
+ builder.append("TableInfo [tableSchema=").append(tableSchema).append(", tableName=").append(tableName)
+ .append(", columns=").append(columns).append(", comments=").append(comments).append(",updateType=").append(updateType)
+ .append(",partitionType=").append(partitionType).append(",partitionColumn=").append(partitionColumn).append(",partitionCount=").append(partitionCount)
+ .append(",primaryKeyColumns=").append(primaryKeyColumns).append("]");
+ return builder.toString();
+ }
+
+ public String getTableSchema() {
+ return tableSchema;
+ }
+
+ public void setTableSchema(String tableSchema) {
+ this.tableSchema = tableSchema;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public void setTableName(String tableName) {
+ this.tableName = tableName;
+ }
+
+ public List getColumns() {
+ return columns;
+ }
+
+ public List getColumnsNames() {
+ List columnNames = new ArrayList();
+ for (ColumnInfo column : this.getColumns()) {
+ columnNames.add(column.getName());
+ }
+ return columnNames;
+ }
+
+ public void setColumns(List columns) {
+ this.columns = columns;
+ }
+
+ public String getComments() {
+ return comments;
+ }
+
+ public void setComments(String comments) {
+ this.comments = comments;
+ }
+
+ public String getTableType() {
+ return tableType;
+ }
+
+ public void setTableType(String tableType) {
+ this.tableType = tableType;
+ }
+
+ public String getUpdateType() {
+ return updateType;
+ }
+
+ public void setUpdateType(String updateType) {
+ this.updateType = updateType;
+ }
+
+ public String getPartitionType() {
+ return partitionType;
+ }
+
+ public void setPartitionType(String partitionType) {
+ this.partitionType = partitionType;
+ }
+
+ public String getPartitionColumn() {
+ return partitionColumn;
+ }
+
+ public void setPartitionColumn(String partitionColumn) {
+ this.partitionColumn = partitionColumn;
+ }
+
+ public int getPartitionCount() {
+ return partitionCount;
+ }
+
+ public void setPartitionCount(int partitionCount) {
+ this.partitionCount = partitionCount;
+ }
+
+ public List getPrimaryKeyColumns() {
+ return primaryKeyColumns;
+ }
+
+ public void setPrimaryKeyColumns(List primaryKeyColumns) {
+ this.primaryKeyColumns = primaryKeyColumns;
+ }
+
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/ads/package-info.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/ads/package-info.java
new file mode 100644
index 0000000000..b396c49ffa
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/ads/package-info.java
@@ -0,0 +1,6 @@
+/**
+ * ADS meta and service.
+ *
+ * @since 0.0.1
+ */
+package com.alibaba.datax.plugin.writer.adswriter.ads;
\ No newline at end of file
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsInsertProxy.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsInsertProxy.java
new file mode 100644
index 0000000000..7211fb9755
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsInsertProxy.java
@@ -0,0 +1,631 @@
+package com.alibaba.datax.plugin.writer.adswriter.insert;
+
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordReceiver;
+import com.alibaba.datax.common.plugin.TaskPluginCollector;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.common.util.RetryUtil;
+import com.alibaba.datax.plugin.rdbms.util.DBUtil;
+import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
+import com.alibaba.datax.plugin.writer.adswriter.ads.TableInfo;
+import com.alibaba.datax.plugin.writer.adswriter.util.AdsUtil;
+import com.alibaba.datax.plugin.writer.adswriter.util.Constant;
+import com.alibaba.datax.plugin.writer.adswriter.util.Key;
+import com.mysql.jdbc.JDBC4PreparedStatement;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.zip.CRC32;
+import java.util.zip.Checksum;
+
+
+public class AdsInsertProxy {
+
+ private static final Logger LOG = LoggerFactory
+ .getLogger(AdsInsertProxy.class);
+ private static final boolean IS_DEBUG_ENABLE = LOG.isDebugEnabled();
+ private static final int MAX_EXCEPTION_CAUSE_ITER = 100;
+
+ private String table;
+ private List columns;
+ private TaskPluginCollector taskPluginCollector;
+ private Configuration configuration;
+ private Boolean emptyAsNull;
+
+ private String writeMode;
+
+ private String insertSqlPrefix;
+ private String deleteSqlPrefix;
+ private int opColumnIndex;
+ private String lastDmlMode;
+ // columnName:
+ private Map> adsTableColumnsMetaData;
+ private Map> userConfigColumnsMetaData;
+ // columnName: index @ ads column
+ private Map primaryKeyNameIndexMap;
+
+ private int retryTimeUpperLimit;
+ private Connection currentConnection;
+
+ private String partitionColumn;
+ private int partitionColumnIndex = -1;
+ private int partitionCount;
+
+ public AdsInsertProxy(String table, List columns, Configuration configuration, TaskPluginCollector taskPluginCollector, TableInfo tableInfo) {
+ this.table = table;
+ this.columns = columns;
+ this.configuration = configuration;
+ this.taskPluginCollector = taskPluginCollector;
+ this.emptyAsNull = configuration.getBool(Key.EMPTY_AS_NULL, false);
+ this.writeMode = configuration.getString(Key.WRITE_MODE);
+ this.insertSqlPrefix = String.format(Constant.INSERT_TEMPLATE, this.table, StringUtils.join(columns, ","));
+ this.deleteSqlPrefix = String.format(Constant.DELETE_TEMPLATE, this.table);
+ this.opColumnIndex = configuration.getInt(Key.OPIndex, 0);
+ this.retryTimeUpperLimit = configuration.getInt(
+ Key.RETRY_CONNECTION_TIME, Constant.DEFAULT_RETRY_TIMES);
+ this.partitionCount = tableInfo.getPartitionCount();
+ this.partitionColumn = tableInfo.getPartitionColumn();
+
+ //目前ads新建的表如果未插入数据不能通过select colums from table where 1=2,获取列信息,需要读取ads数据字典
+ //not this: this.resultSetMetaData = DBUtil.getColumnMetaData(connection, this.table, StringUtils.join(this.columns, ","));
+ //no retry here(fetch meta data) 注意实时表列换序的可能
+ this.adsTableColumnsMetaData = AdsInsertUtil.getColumnMetaData(tableInfo, this.columns);
+ this.userConfigColumnsMetaData = new HashMap>();
+
+ List primaryKeyColumnName = tableInfo.getPrimaryKeyColumns();
+ List adsColumnsNames = tableInfo.getColumnsNames();
+ this.primaryKeyNameIndexMap = new HashMap();
+ //warn: 要使用用户配置的column顺序, 不要使用从ads元数据获取的column顺序, 原来复用load列顺序其实有问题的
+ for (int i = 0; i < this.columns.size(); i++) {
+ String oriEachColumn = this.columns.get(i);
+ String eachColumn = oriEachColumn;
+ // 防御性保留字
+ if (eachColumn.startsWith(Constant.ADS_QUOTE_CHARACTER) && eachColumn.endsWith(Constant.ADS_QUOTE_CHARACTER)) {
+ eachColumn = eachColumn.substring(1, eachColumn.length() - 1);
+ }
+ for (String eachPrimary : primaryKeyColumnName) {
+ if (eachColumn.equalsIgnoreCase(eachPrimary)) {
+ this.primaryKeyNameIndexMap.put(oriEachColumn, i);
+ }
+ }
+ for (String eachAdsColumn : adsColumnsNames) {
+ if (eachColumn.equalsIgnoreCase(eachAdsColumn)) {
+ this.userConfigColumnsMetaData.put(oriEachColumn, this.adsTableColumnsMetaData.get(eachAdsColumn));
+ }
+ }
+
+ // 根据第几个column分区列排序,ads实时表只有一级分区、最多256个分区
+ if (eachColumn.equalsIgnoreCase(this.partitionColumn)) {
+ this.partitionColumnIndex = i;
+ }
+ }
+ }
+
+ public void startWriteWithConnection(RecordReceiver recordReceiver,
+ Connection connection,
+ int columnNumber) {
+ this.currentConnection = connection;
+ int batchSize = this.configuration.getInt(Key.BATCH_SIZE, Constant.DEFAULT_BATCH_SIZE);
+ // 默认情况下bufferSize需要和batchSize一致
+ int bufferSize = this.configuration.getInt(Key.BUFFER_SIZE, batchSize);
+ // insert缓冲,多个分区排序后insert合并发送到ads
+ List writeBuffer = new ArrayList(bufferSize);
+ List deleteBuffer = null;
+ if (this.writeMode.equalsIgnoreCase(Constant.STREAMMODE)) {
+ // delete缓冲,多个分区排序后delete合并发送到ads
+ deleteBuffer = new ArrayList(bufferSize);
+ }
+ try {
+ Record record;
+ while ((record = recordReceiver.getFromReader()) != null) {
+ if (this.writeMode.equalsIgnoreCase(Constant.INSERTMODE)) {
+ if (record.getColumnNumber() != columnNumber) {
+ // 源头读取字段列数与目的表字段写入列数不相等,直接报错
+ throw DataXException
+ .asDataXException(
+ DBUtilErrorCode.CONF_ERROR,
+ String.format(
+ "列配置信息有错误. 因为您配置的任务中,源头读取字段数:%s 与 目的表要写入的字段数:%s 不相等. 请检查您的配置并作出修改.",
+ record.getColumnNumber(),
+ columnNumber));
+ }
+ writeBuffer.add(record);
+ if (writeBuffer.size() >= bufferSize) {
+ this.doBatchRecordWithPartitionSort(writeBuffer, Constant.INSERTMODE, bufferSize, batchSize);
+ writeBuffer.clear();
+ }
+ } else {
+ if (record.getColumnNumber() != columnNumber + 1) {
+ // 源头读取字段列数需要为目的表字段写入列数+1, 直接报错, 源头多了一列OP
+ throw DataXException
+ .asDataXException(
+ DBUtilErrorCode.CONF_ERROR,
+ String.format(
+ "列配置信息有错误. 因为您配置的任务中,源头读取字段数:%s 与 目的表要写入的字段数:%s 不满足源头多1列操作类型列. 请检查您的配置并作出修改.",
+ record.getColumnNumber(),
+ columnNumber));
+ }
+ String optionColumnValue = record.getColumn(this.opColumnIndex).asString();
+ OperationType operationType = OperationType.asOperationType(optionColumnValue);
+ if (operationType.isInsertTemplate()) {
+ writeBuffer.add(record);
+ if (this.lastDmlMode == null || this.lastDmlMode == Constant.INSERTMODE ) {
+ this.lastDmlMode = Constant.INSERTMODE;
+ if (writeBuffer.size() >= bufferSize) {
+ this.doBatchRecordWithPartitionSort(writeBuffer, Constant.INSERTMODE, bufferSize, batchSize);
+ writeBuffer.clear();
+ }
+ } else {
+ this.lastDmlMode = Constant.INSERTMODE;
+ // 模式变换触发一次提交ads delete, 并进入insert模式
+ this.doBatchRecordWithPartitionSort(deleteBuffer, Constant.DELETEMODE, bufferSize, batchSize);
+ deleteBuffer.clear();
+ }
+ } else if (operationType.isDeleteTemplate()) {
+ deleteBuffer.add(record);
+ if (this.lastDmlMode == null || this.lastDmlMode == Constant.DELETEMODE ) {
+ this.lastDmlMode = Constant.DELETEMODE;
+ if (deleteBuffer.size() >= bufferSize) {
+ this.doBatchRecordWithPartitionSort(deleteBuffer, Constant.DELETEMODE, bufferSize, batchSize);
+ deleteBuffer.clear();
+ }
+ } else {
+ this.lastDmlMode = Constant.DELETEMODE;
+ // 模式变换触发一次提交ads insert, 并进入delete模式
+ this.doBatchRecordWithPartitionSort(writeBuffer, Constant.INSERTMODE, bufferSize, batchSize);
+ writeBuffer.clear();
+ }
+ } else {
+ // 注意OP操作类型的脏数据, 这里不需要重试
+ this.taskPluginCollector.collectDirtyRecord(record, String.format("不支持您的更新类型:%s", optionColumnValue));
+ }
+ }
+ }
+
+ if (!writeBuffer.isEmpty()) {
+ //doOneRecord(writeBuffer, Constant.INSERTMODE);
+ this.doBatchRecordWithPartitionSort(writeBuffer, Constant.INSERTMODE, bufferSize, batchSize);
+ writeBuffer.clear();
+ }
+ // 2个缓冲最多一个不为空同时
+ if (null!= deleteBuffer && !deleteBuffer.isEmpty()) {
+ //doOneRecord(deleteBuffer, Constant.DELETEMODE);
+ this.doBatchRecordWithPartitionSort(deleteBuffer, Constant.DELETEMODE, bufferSize, batchSize);
+ deleteBuffer.clear();
+ }
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ DBUtilErrorCode.WRITE_DATA_ERROR, e);
+ } finally {
+ writeBuffer.clear();
+ DBUtil.closeDBResources(null, null, connection);
+ }
+ }
+
+ /**
+ * @param bufferSize datax缓冲记录条数
+ * @param batchSize datax向ads系统一次发送数据条数
+ * @param buffer datax缓冲区
+ * @param mode 实时表模式insert 或者 stream
+ * */
+ private void doBatchRecordWithPartitionSort(List buffer, String mode, int bufferSize, int batchSize) throws SQLException{
+ //warn: 排序会影响数据插入顺序, 如果源头没有数据约束, 排序可能造成数据不一致, 快速排序是一种不稳定的排序算法
+ //warn: 不明确配置bufferSize或者小于batchSize的情况下,不要进行排序;如果缓冲区实际内容条数少于batchSize也不排序了,最后一次的余量
+ int recordBufferedNumber = buffer.size();
+ if (bufferSize > batchSize && recordBufferedNumber > batchSize && this.partitionColumnIndex >= 0) {
+ final int partitionColumnIndex = this.partitionColumnIndex;
+ final int partitionCount = this.partitionCount;
+ Collections.sort(buffer, new Comparator() {
+ @Override
+ public int compare(Record record1, Record record2) {
+ int hashPartition1 = AdsInsertProxy.getHashPartition(record1.getColumn(partitionColumnIndex).asString(), partitionCount);
+ int hashPartition2 = AdsInsertProxy.getHashPartition(record2.getColumn(partitionColumnIndex).asString(), partitionCount);
+ return hashPartition1 - hashPartition2;
+ }
+ });
+ }
+ // 将缓冲区的Record输出到ads, 使用recordBufferedNumber哦
+ for (int i = 0; i < recordBufferedNumber; i += batchSize) {
+ int toIndex = i + batchSize;
+ if (toIndex > recordBufferedNumber) {
+ toIndex = recordBufferedNumber;
+ }
+ this.doBatchRecord(buffer.subList(i, toIndex), mode);
+ }
+ }
+
+ private void doBatchRecord(final List buffer, final String mode) throws SQLException {
+ List> retryExceptionClasss = new ArrayList>();
+ retryExceptionClasss.add(com.mysql.jdbc.exceptions.jdbc4.CommunicationsException.class);
+ retryExceptionClasss.add(java.net.SocketException.class);
+ try {
+ RetryUtil.executeWithRetry(new Callable() {
+ @Override
+ public Boolean call() throws Exception {
+ doBatchRecordDml(buffer, mode);
+ return true;
+ }
+ }, this.retryTimeUpperLimit, 2000L, true, retryExceptionClasss);
+ }catch (SQLException e) {
+ LOG.warn(String.format("after retry %s times, doBatchRecord meet a exception: ", this.retryTimeUpperLimit), e);
+ LOG.info("try to re execute for each record...");
+ doOneRecord(buffer, mode);
+ // below is the old way
+ // for (Record eachRecord : buffer) {
+ // this.taskPluginCollector.collectDirtyRecord(eachRecord, e);
+ // }
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ DBUtilErrorCode.WRITE_DATA_ERROR, e);
+ }
+ }
+
+ //warn: ADS 无法支持事物roll back都是不管用
+ @SuppressWarnings("resource")
+ private void doBatchRecordDml(List buffer, String mode) throws Exception {
+ Statement statement = null;
+ String sql = null;
+ try {
+ int bufferSize = buffer.size();
+ if (buffer.isEmpty()) {
+ return;
+ }
+ StringBuilder sqlSb = new StringBuilder();
+ // connection.setAutoCommit(true);
+ //mysql impl warn: if a database access error occurs or this method is called on a closed connection throw SQLException
+ statement = this.currentConnection.createStatement();
+ sqlSb.append(this.generateDmlSql(this.currentConnection, buffer.get(0), mode));
+ for (int i = 1; i < bufferSize; i++) {
+ Record record = buffer.get(i);
+ this.appendDmlSqlValues(this.currentConnection, record, sqlSb, mode);
+ }
+ sql = sqlSb.toString();
+ if (IS_DEBUG_ENABLE) {
+ LOG.debug(sql);
+ }
+ @SuppressWarnings("unused")
+ int status = statement.executeUpdate(sql);
+ sql = null;
+ } catch (SQLException e) {
+ LOG.warn("doBatchRecordDml meet a exception: " + sql, e);
+ Exception eachException = e;
+ int maxIter = 0;// 避免死循环
+ while (null != eachException && maxIter < AdsInsertProxy.MAX_EXCEPTION_CAUSE_ITER) {
+ if (this.isRetryable(eachException)) {
+ LOG.warn("doBatchRecordDml meet a retry exception: " + e.getMessage());
+ this.currentConnection = AdsUtil.getAdsConnect(this.configuration);
+ throw eachException;
+ } else {
+ try {
+ Throwable causeThrowable = eachException.getCause();
+ eachException = causeThrowable == null ? null : (Exception)causeThrowable;
+ } catch (Exception castException) {
+ LOG.warn("doBatchRecordDml meet a no! retry exception: " + e.getMessage());
+ throw e;
+ }
+ }
+ maxIter++;
+ }
+ throw e;
+ } catch (Exception e) {
+ LOG.error("插入异常, sql: " + sql);
+ throw DataXException.asDataXException(
+ DBUtilErrorCode.WRITE_DATA_ERROR, e);
+ } finally {
+ DBUtil.closeDBResources(statement, null);
+ }
+ }
+
+ private void doOneRecord(List buffer, final String mode) {
+ List> retryExceptionClasss = new ArrayList>();
+ retryExceptionClasss.add(com.mysql.jdbc.exceptions.jdbc4.CommunicationsException.class);
+ retryExceptionClasss.add(java.net.SocketException.class);
+ for (final Record record : buffer) {
+ try {
+ RetryUtil.executeWithRetry(new Callable() {
+ @Override
+ public Boolean call() throws Exception {
+ doOneRecordDml(record, mode);
+ return true;
+ }
+ }, this.retryTimeUpperLimit, 2000L, true, retryExceptionClasss);
+ } catch (Exception e) {
+ // 不能重试的一行,记录脏数据
+ this.taskPluginCollector.collectDirtyRecord(record, e);
+ }
+ }
+ }
+
+ @SuppressWarnings("resource")
+ private void doOneRecordDml(Record record, String mode) throws Exception {
+ Statement statement = null;
+ String sql = null;
+ try {
+ // connection.setAutoCommit(true);
+ statement = this.currentConnection.createStatement();
+ sql = generateDmlSql(this.currentConnection, record, mode);
+ if (IS_DEBUG_ENABLE) {
+ LOG.debug(sql);
+ }
+ @SuppressWarnings("unused")
+ int status = statement.executeUpdate(sql);
+ sql = null;
+ } catch (SQLException e) {
+ LOG.error("doOneDml meet a exception: " + sql, e);
+ //need retry before record dirty data
+ //this.taskPluginCollector.collectDirtyRecord(record, e);
+ // 更新当前可用连接
+ Exception eachException = e;
+ int maxIter = 0;// 避免死循环
+ while (null != eachException && maxIter < AdsInsertProxy.MAX_EXCEPTION_CAUSE_ITER) {
+ if (this.isRetryable(eachException)) {
+ LOG.warn("doOneDml meet a retry exception: " + e.getMessage());
+ this.currentConnection = AdsUtil.getAdsConnect(this.configuration);
+ throw eachException;
+ } else {
+ try {
+ Throwable causeThrowable = eachException.getCause();
+ eachException = causeThrowable == null ? null : (Exception)causeThrowable;
+ } catch (Exception castException) {
+ LOG.warn("doOneDml meet a no! retry exception: " + e.getMessage());
+ throw e;
+ }
+ }
+ maxIter++;
+ }
+ throw e;
+ } catch (Exception e) {
+ LOG.error("插入异常, sql: " + sql);
+ throw DataXException.asDataXException(
+ DBUtilErrorCode.WRITE_DATA_ERROR, e);
+ } finally {
+ DBUtil.closeDBResources(statement, null);
+ }
+ }
+
+ private boolean isRetryable(Throwable e) {
+ Class> meetExceptionClass = e.getClass();
+ if (meetExceptionClass == com.mysql.jdbc.exceptions.jdbc4.CommunicationsException.class) {
+ return true;
+ }
+ if (meetExceptionClass == java.net.SocketException.class) {
+ return true;
+ }
+ return false;
+ }
+
+ private String generateDmlSql(Connection connection, Record record, String mode) throws SQLException {
+ String sql = null;
+ StringBuilder sqlSb = new StringBuilder();
+ if (mode.equalsIgnoreCase(Constant.INSERTMODE)) {
+ sqlSb.append(this.insertSqlPrefix);
+ sqlSb.append("(");
+ int columnsSize = this.columns.size();
+ for (int i = 0; i < columnsSize; i++) {
+ if((i + 1) != columnsSize) {
+ sqlSb.append("?,");
+ } else {
+ sqlSb.append("?");
+ }
+ }
+ sqlSb.append(")");
+ //mysql impl warn: if a database access error occurs or this method is called on a closed connection
+ PreparedStatement statement = connection.prepareStatement(sqlSb.toString());
+ for (int i = 0; i < this.columns.size(); i++) {
+ int preparedParamsIndex = i;
+ if (Constant.STREAMMODE.equalsIgnoreCase(this.writeMode)) {
+ if (preparedParamsIndex >= this.opColumnIndex) {
+ preparedParamsIndex = i + 1;
+ }
+ }
+ String columnName = this.columns.get(i);
+ int columnSqltype = this.userConfigColumnsMetaData.get(columnName).getLeft();
+ prepareColumnTypeValue(statement, columnSqltype, record.getColumn(preparedParamsIndex), i, columnName);
+ }
+ sql = ((JDBC4PreparedStatement) statement).asSql();
+ DBUtil.closeDBResources(statement, null);
+ } else {
+ sqlSb.append(this.deleteSqlPrefix);
+ sqlSb.append("(");
+ Set> primaryEntrySet = this.primaryKeyNameIndexMap.entrySet();
+ int entrySetSize = primaryEntrySet.size();
+ int i = 0;
+ for (Entry eachEntry : primaryEntrySet) {
+ if((i + 1) != entrySetSize) {
+ sqlSb.append(String.format(" (%s = ?) and ", eachEntry.getKey()));
+ } else {
+ sqlSb.append(String.format(" (%s = ?) ", eachEntry.getKey()));
+ }
+ i++;
+ }
+ sqlSb.append(")");
+ //mysql impl warn: if a database access error occurs or this method is called on a closed connection
+ PreparedStatement statement = connection.prepareStatement(sqlSb.toString());
+ i = 0;
+ //ads的real time表只能是1级分区、且分区列类型是long, 但是这里是需要主键删除的
+ for (Entry each : primaryEntrySet) {
+ String columnName = each.getKey();
+ int columnSqlType = this.userConfigColumnsMetaData.get(columnName).getLeft();
+ int primaryKeyInUserConfigIndex = this.primaryKeyNameIndexMap.get(columnName);
+ if (primaryKeyInUserConfigIndex >= this.opColumnIndex) {
+ primaryKeyInUserConfigIndex ++;
+ }
+ prepareColumnTypeValue(statement, columnSqlType, record.getColumn(primaryKeyInUserConfigIndex), i, columnName);
+ i++;
+ }
+ sql = ((JDBC4PreparedStatement) statement).asSql();
+ DBUtil.closeDBResources(statement, null);
+ }
+ return sql;
+ }
+
+ private void appendDmlSqlValues(Connection connection, Record record, StringBuilder sqlSb, String mode) throws SQLException {
+ String sqlResult = this.generateDmlSql(connection, record, mode);
+ if (mode.equalsIgnoreCase(Constant.INSERTMODE)) {
+ sqlSb.append(",");
+ sqlSb.append(sqlResult.substring(this.insertSqlPrefix.length()));
+ } else {
+ // 之前已经充分增加过括号了
+ sqlSb.append(" or ");
+ sqlSb.append(sqlResult.substring(this.deleteSqlPrefix.length()));
+ }
+ }
+
+ private void prepareColumnTypeValue(PreparedStatement statement, int columnSqltype, Column column, int preparedPatamIndex, String columnName) throws SQLException {
+ java.util.Date utilDate;
+ switch (columnSqltype) {
+ case Types.CHAR:
+ case Types.NCHAR:
+ case Types.CLOB:
+ case Types.NCLOB:
+ case Types.VARCHAR:
+ case Types.LONGVARCHAR:
+ case Types.NVARCHAR:
+ case Types.LONGNVARCHAR:
+ String strValue = column.asString();
+ statement.setString(preparedPatamIndex + 1, strValue);
+ break;
+
+ case Types.SMALLINT:
+ case Types.INTEGER:
+ case Types.BIGINT:
+ case Types.NUMERIC:
+ case Types.DECIMAL:
+ case Types.REAL:
+ String numValue = column.asString();
+ if(emptyAsNull && "".equals(numValue) || numValue == null){
+ //statement.setObject(preparedPatamIndex + 1, null);
+ statement.setNull(preparedPatamIndex + 1, Types.BIGINT);
+ } else{
+ statement.setLong(preparedPatamIndex + 1, column.asLong());
+ }
+ break;
+
+ case Types.FLOAT:
+ case Types.DOUBLE:
+ String floatValue = column.asString();
+ if(emptyAsNull && "".equals(floatValue) || floatValue == null){
+ //statement.setObject(preparedPatamIndex + 1, null);
+ statement.setNull(preparedPatamIndex + 1, Types.DOUBLE);
+ } else{
+ statement.setDouble(preparedPatamIndex + 1, column.asDouble());
+ }
+ break;
+
+ //tinyint is a little special in some database like mysql {boolean->tinyint(1)}
+ case Types.TINYINT:
+ Long longValue = column.asLong();
+ if (null == longValue) {
+ statement.setNull(preparedPatamIndex + 1, Types.BIGINT);
+ } else {
+ statement.setLong(preparedPatamIndex + 1, longValue);
+ }
+
+ break;
+
+ case Types.DATE:
+ java.sql.Date sqlDate = null;
+ try {
+ if("".equals(column.getRawData())) {
+ utilDate = null;
+ } else {
+ utilDate = column.asDate();
+ }
+ } catch (DataXException e) {
+ throw new SQLException(String.format(
+ "Date 类型转换错误:[%s]", column));
+ }
+
+ if (null != utilDate) {
+ sqlDate = new java.sql.Date(utilDate.getTime());
+ }
+ statement.setDate(preparedPatamIndex + 1, sqlDate);
+ break;
+
+ case Types.TIME:
+ java.sql.Time sqlTime = null;
+ try {
+ if("".equals(column.getRawData())) {
+ utilDate = null;
+ } else {
+ utilDate = column.asDate();
+ }
+ } catch (DataXException e) {
+ throw new SQLException(String.format(
+ "TIME 类型转换错误:[%s]", column));
+ }
+
+ if (null != utilDate) {
+ sqlTime = new java.sql.Time(utilDate.getTime());
+ }
+ statement.setTime(preparedPatamIndex + 1, sqlTime);
+ break;
+
+ case Types.TIMESTAMP:
+ java.sql.Timestamp sqlTimestamp = null;
+ try {
+ if("".equals(column.getRawData())) {
+ utilDate = null;
+ } else {
+ utilDate = column.asDate();
+ }
+ } catch (DataXException e) {
+ throw new SQLException(String.format(
+ "TIMESTAMP 类型转换错误:[%s]", column));
+ }
+
+ if (null != utilDate) {
+ sqlTimestamp = new java.sql.Timestamp(
+ utilDate.getTime());
+ }
+ statement.setTimestamp(preparedPatamIndex + 1, sqlTimestamp);
+ break;
+
+ case Types.BOOLEAN:
+ //case Types.BIT: ads 没有bit
+ Boolean booleanValue = column.asBoolean();
+ if (null == booleanValue) {
+ statement.setNull(preparedPatamIndex + 1, Types.BOOLEAN);
+ } else {
+ statement.setBoolean(preparedPatamIndex + 1, booleanValue);
+ }
+
+ break;
+ default:
+ Pair columnMetaPair = this.userConfigColumnsMetaData.get(columnName);
+ throw DataXException
+ .asDataXException(
+ DBUtilErrorCode.UNSUPPORTED_TYPE,
+ String.format(
+ "您的配置文件中的列配置信息有误. 因为DataX 不支持数据库写入这种字段类型. 字段名:[%s], 字段类型:[%s], 字段Java类型:[%s]. 请修改表中该字段的类型或者不同步该字段.",
+ columnName, columnMetaPair.getRight(), columnMetaPair.getLeft()));
+ }
+ }
+
+ private static int getHashPartition(String value, int totalHashPartitionNum) {
+ long crc32 = (value == null ? getCRC32("-1") : getCRC32(value));
+ return (int) (crc32 % totalHashPartitionNum);
+ }
+
+ private static long getCRC32(String value) {
+ Checksum checksum = new CRC32();
+ byte[] bytes = value.getBytes();
+ checksum.update(bytes, 0, bytes.length);
+ return checksum.getValue();
+ }
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsInsertUtil.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsInsertUtil.java
new file mode 100644
index 0000000000..8e44e8c794
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsInsertUtil.java
@@ -0,0 +1,153 @@
+package com.alibaba.datax.plugin.writer.adswriter.insert;
+
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.common.util.ListUtil;
+import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
+import com.alibaba.datax.plugin.writer.adswriter.AdsException;
+import com.alibaba.datax.plugin.writer.adswriter.AdsWriterErrorCode;
+import com.alibaba.datax.plugin.writer.adswriter.ads.ColumnInfo;
+import com.alibaba.datax.plugin.writer.adswriter.ads.TableInfo;
+import com.alibaba.datax.plugin.writer.adswriter.load.AdsHelper;
+import com.alibaba.datax.plugin.writer.adswriter.util.AdsUtil;
+import com.alibaba.datax.plugin.writer.adswriter.util.Constant;
+import com.alibaba.datax.plugin.writer.adswriter.util.Key;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+public class AdsInsertUtil {
+
+ private static final Logger LOG = LoggerFactory
+ .getLogger(AdsInsertUtil.class);
+
+ public static TableInfo getAdsTableInfo(Configuration conf) {
+ AdsHelper adsHelper = AdsUtil.createAdsHelper(conf);
+ TableInfo tableInfo= null;
+ try {
+ tableInfo = adsHelper.getTableInfo(conf.getString(Key.ADS_TABLE));
+ } catch (AdsException e) {
+ throw DataXException.asDataXException(AdsWriterErrorCode.GET_ADS_TABLE_MEATA_FAILED, e);
+ }
+ return tableInfo;
+ }
+
+ /*
+ * 返回列顺序为ads建表列顺序
+ * */
+ public static List getAdsTableColumnNames(Configuration conf) {
+ List tableColumns = new ArrayList();
+ AdsHelper adsHelper = AdsUtil.createAdsHelper(conf);
+ TableInfo tableInfo= null;
+ String adsTable = conf.getString(Key.ADS_TABLE);
+ try {
+ tableInfo = adsHelper.getTableInfo(adsTable);
+ } catch (AdsException e) {
+ throw DataXException.asDataXException(AdsWriterErrorCode.GET_ADS_TABLE_MEATA_FAILED, e);
+ }
+
+ List columnInfos = tableInfo.getColumns();
+ for(ColumnInfo columnInfo: columnInfos) {
+ tableColumns.add(columnInfo.getName());
+ }
+
+ LOG.info("table:[{}] all columns:[\n{}\n].", adsTable, StringUtils.join(tableColumns, ","));
+ return tableColumns;
+ }
+
+ public static Map> getColumnMetaData
+ (Configuration configuration, List userColumns) {
+ Map> columnMetaData = new HashMap>();
+ List columnInfoList = getAdsTableColumns(configuration);
+ for(String column : userColumns) {
+ if (column.startsWith(Constant.ADS_QUOTE_CHARACTER) && column.endsWith(Constant.ADS_QUOTE_CHARACTER)) {
+ column = column.substring(1, column.length() - 1);
+ }
+ for (ColumnInfo columnInfo : columnInfoList) {
+ if(column.equalsIgnoreCase(columnInfo.getName())) {
+ Pair eachPair = new ImmutablePair(columnInfo.getDataType().sqlType, columnInfo.getDataType().name);
+ columnMetaData.put(columnInfo.getName(), eachPair);
+ }
+ }
+ }
+ return columnMetaData;
+ }
+
+ public static Map> getColumnMetaData(TableInfo tableInfo, List userColumns){
+ Map> columnMetaData = new HashMap>();
+ List columnInfoList = tableInfo.getColumns();
+ for(String column : userColumns) {
+ if (column.startsWith(Constant.ADS_QUOTE_CHARACTER) && column.endsWith(Constant.ADS_QUOTE_CHARACTER)) {
+ column = column.substring(1, column.length() - 1);
+ }
+ for (ColumnInfo columnInfo : columnInfoList) {
+ if(column.equalsIgnoreCase(columnInfo.getName())) {
+ Pair eachPair = new ImmutablePair(columnInfo.getDataType().sqlType, columnInfo.getDataType().name);
+ columnMetaData.put(columnInfo.getName(), eachPair);
+ }
+ }
+ }
+ return columnMetaData;
+ }
+
+ /*
+ * 返回列顺序为ads建表列顺序
+ * */
+ public static List getAdsTableColumns(Configuration conf) {
+ AdsHelper adsHelper = AdsUtil.createAdsHelper(conf);
+ TableInfo tableInfo= null;
+ String adsTable = conf.getString(Key.ADS_TABLE);
+ try {
+ tableInfo = adsHelper.getTableInfo(adsTable);
+ } catch (AdsException e) {
+ throw DataXException.asDataXException(AdsWriterErrorCode.GET_ADS_TABLE_MEATA_FAILED, e);
+ }
+
+ List columnInfos = tableInfo.getColumns();
+
+ return columnInfos;
+ }
+
+ public static void dealColumnConf(Configuration originalConfig, List tableColumns) {
+ List userConfiguredColumns = originalConfig.getList(Key.COLUMN, String.class);
+ if (null == userConfiguredColumns || userConfiguredColumns.isEmpty()) {
+ throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE,
+ "您的配置文件中的列配置信息有误. 因为您未配置写入数据库表的列名称,DataX获取不到列信息. 请检查您的配置并作出修改.");
+ } else {
+ if (1 == userConfiguredColumns.size() && "*".equals(userConfiguredColumns.get(0))) {
+ LOG.warn("您的配置文件中的列配置信息存在风险. 因为您配置的写入数据库表的列为*,当您的表字段个数、类型有变动时,可能影响任务正确性甚至会运行出错。请检查您的配置并作出修改.");
+
+ // 回填其值,需要以 String 的方式转交后续处理
+ originalConfig.set(Key.COLUMN, tableColumns);
+ } else if (userConfiguredColumns.size() > tableColumns.size()) {
+ throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE,
+ String.format("您的配置文件中的列配置信息有误. 因为您所配置的写入数据库表的字段个数:%s 大于目的表的总字段总个数:%s. 请检查您的配置并作出修改.",
+ userConfiguredColumns.size(), tableColumns.size()));
+ } else {
+ // 确保用户配置的 column 不重复
+ ListUtil.makeSureNoValueDuplicate(userConfiguredColumns, false);
+ // 检查列是否都为数据库表中正确的列(通过执行一次 select column from table 进行判断)
+ // ListUtil.makeSureBInA(tableColumns, userConfiguredColumns, true);
+ // 支持关键字和保留字, ads列是不区分大小写的
+ List removeQuotedColumns = new ArrayList();
+ for (String each : userConfiguredColumns) {
+ if (each.startsWith(Constant.ADS_QUOTE_CHARACTER) && each.endsWith(Constant.ADS_QUOTE_CHARACTER)) {
+ removeQuotedColumns.add(each.substring(1, each.length() - 1));
+ } else {
+ removeQuotedColumns.add(each);
+ }
+ }
+ ListUtil.makeSureBInA(tableColumns, removeQuotedColumns, false);
+ }
+ }
+ }
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/OperationType.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/OperationType.java
new file mode 100644
index 0000000000..a689e70327
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/OperationType.java
@@ -0,0 +1,75 @@
+package com.alibaba.datax.plugin.writer.adswriter.insert;
+
+public enum OperationType {
+ // i: insert uo:before image uu:before image un: after image d: delete
+ // u:update
+ I("i"), UO("uo"), UU("uu"), UN("un"), D("d"), U("u"), UNKNOWN("unknown"), ;
+ private OperationType(String type) {
+ this.type = type;
+ }
+
+ private String type;
+
+ public String getType() {
+ return this.type;
+ }
+
+ public static OperationType asOperationType(String type) {
+ if ("i".equalsIgnoreCase(type)) {
+ return I;
+ } else if ("uo".equalsIgnoreCase(type)) {
+ return UO;
+ } else if ("uu".equalsIgnoreCase(type)) {
+ return UU;
+ } else if ("un".equalsIgnoreCase(type)) {
+ return UN;
+ } else if ("d".equalsIgnoreCase(type)) {
+ return D;
+ } else if ("u".equalsIgnoreCase(type)) {
+ return U;
+ } else {
+ return UNKNOWN;
+ }
+ }
+
+ public boolean isInsertTemplate() {
+ switch (this) {
+ // 建议merge 过后应该只有I和U两类
+ case I:
+ case UO:
+ case UU:
+ case UN:
+ case U:
+ return true;
+ case D:
+ return false;
+ default:
+ return false;
+ }
+ }
+
+ public boolean isDeleteTemplate() {
+ switch (this) {
+ // 建议merge 过后应该只有I和U两类
+ case I:
+ case UO:
+ case UU:
+ case UN:
+ case U:
+ return false;
+ case D:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ public boolean isLegal() {
+ return this.type != UNKNOWN.getType();
+ }
+
+ @Override
+ public String toString() {
+ return this.name();
+ }
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/AdsHelper.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/AdsHelper.java
new file mode 100644
index 0000000000..924f6fcb61
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/AdsHelper.java
@@ -0,0 +1,429 @@
+/**
+ *
+ */
+package com.alibaba.datax.plugin.writer.adswriter.load;
+
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.util.RetryUtil;
+import com.alibaba.datax.plugin.rdbms.util.DBUtil;
+import com.alibaba.datax.plugin.writer.adswriter.AdsException;
+import com.alibaba.datax.plugin.writer.adswriter.AdsWriterErrorCode;
+import com.alibaba.datax.plugin.writer.adswriter.ads.ColumnDataType;
+import com.alibaba.datax.plugin.writer.adswriter.ads.ColumnInfo;
+import com.alibaba.datax.plugin.writer.adswriter.ads.TableInfo;
+import com.alibaba.datax.plugin.writer.adswriter.util.AdsUtil;
+
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+import java.util.concurrent.Callable;
+
+public class AdsHelper {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(AdsHelper.class);
+
+ private String adsURL;
+ private String userName;
+ private String password;
+ private String schema;
+ private Long socketTimeout;
+ private String suffix;
+
+ public AdsHelper(String adsUrl, String userName, String password, String schema, Long socketTimeout, String suffix) {
+ this.adsURL = adsUrl;
+ this.userName = userName;
+ this.password = password;
+ this.schema = schema;
+ this.socketTimeout = socketTimeout;
+ this.suffix = suffix;
+ }
+
+ public String getAdsURL() {
+ return adsURL;
+ }
+
+ public void setAdsURL(String adsURL) {
+ this.adsURL = adsURL;
+ }
+
+ public String getUserName() {
+ return userName;
+ }
+
+ public void setUserName(String userName) {
+ this.userName = userName;
+ }
+
+ public String getPassword() {
+ return password;
+ }
+
+ public void setPassword(String password) {
+ this.password = password;
+ }
+
+ public String getSchema() {
+ return schema;
+ }
+
+ public void setSchema(String schema) {
+ this.schema = schema;
+ }
+
+ /**
+ * Obtain the table meta information.
+ *
+ * @param table The table
+ * @return The table meta information
+ * @throws com.alibaba.datax.plugin.writer.adswriter.AdsException
+ */
+ public TableInfo getTableInfo(String table) throws AdsException {
+
+ if (table == null) {
+ throw new AdsException(AdsException.ADS_TABLEMETA_TABLE_NULL, "Table is null.", null);
+ }
+
+ if (adsURL == null) {
+ throw new AdsException(AdsException.ADS_CONN_URL_NOT_SET, "ADS JDBC connection URL was not set.", null);
+ }
+
+ if (userName == null) {
+ throw new AdsException(AdsException.ADS_CONN_USERNAME_NOT_SET,
+ "ADS JDBC connection user name was not set.", null);
+ }
+
+ if (password == null) {
+ throw new AdsException(AdsException.ADS_CONN_PASSWORD_NOT_SET, "ADS JDBC connection password was not set.",
+ null);
+ }
+
+ if (schema == null) {
+ throw new AdsException(AdsException.ADS_CONN_SCHEMA_NOT_SET, "ADS JDBC connection schema was not set.",
+ null);
+ }
+
+ Connection connection = null;
+ Statement statement = null;
+ ResultSet rs = null;
+ try {
+ Class.forName("com.mysql.jdbc.Driver");
+ String url = AdsUtil.prepareJdbcUrl(this.adsURL, this.schema, this.socketTimeout, this.suffix);
+
+ Properties connectionProps = new Properties();
+ connectionProps.put("user", userName);
+ connectionProps.put("password", password);
+ connection = DriverManager.getConnection(url, connectionProps);
+ statement = connection.createStatement();
+ // ads 表名、schema名不区分大小写, 提高用户易用性, 注意列顺序性
+ String columnMetaSql = String.format("select ordinal_position,column_name,data_type,type_name,column_comment from information_schema.columns where lower(table_schema) = `'%s'` and lower(table_name) = `'%s'` order by ordinal_position", schema.toLowerCase(), table.toLowerCase());
+ LOG.info(String.format("检查列信息sql语句:%s", columnMetaSql));
+ rs = statement.executeQuery(columnMetaSql);
+
+ TableInfo tableInfo = new TableInfo();
+ List columnInfoList = new ArrayList();
+ while (DBUtil.asyncResultSetNext(rs)) {
+ ColumnInfo columnInfo = new ColumnInfo();
+ columnInfo.setOrdinal(rs.getInt(1));
+ columnInfo.setName(rs.getString(2));
+ //columnInfo.setDataType(ColumnDataType.getDataType(rs.getInt(3))); //for ads version < 0.7
+ //columnInfo.setDataType(ColumnDataType.getTypeByName(rs.getString(3).toUpperCase())); //for ads version 0.8
+ columnInfo.setDataType(ColumnDataType.getTypeByName(rs.getString(4).toUpperCase())); //for ads version 0.8 & 0.7
+ columnInfo.setComment(rs.getString(5));
+ columnInfoList.add(columnInfo);
+ }
+ if (columnInfoList.isEmpty()) {
+ throw DataXException.asDataXException(AdsWriterErrorCode.NO_ADS_TABLE, table + "不存在或者查询不到列信息. ");
+ }
+ tableInfo.setColumns(columnInfoList);
+ tableInfo.setTableSchema(schema);
+ tableInfo.setTableName(table);
+ DBUtil.closeDBResources(rs, statement, null);
+
+ String tableMetaSql = String.format("select update_type, partition_type, partition_column, partition_count, primary_key_columns from information_schema.tables where lower(table_schema) = `'%s'` and lower(table_name) = `'%s'`", schema.toLowerCase(), table.toLowerCase());
+ LOG.info(String.format("检查表信息sql语句:%s", tableMetaSql));
+ statement = connection.createStatement();
+ rs = statement.executeQuery(tableMetaSql);
+ while (DBUtil.asyncResultSetNext(rs)) {
+ tableInfo.setUpdateType(rs.getString(1));
+ tableInfo.setPartitionType(rs.getString(2));
+ tableInfo.setPartitionColumn(rs.getString(3));
+ tableInfo.setPartitionCount(rs.getInt(4));
+ //primary_key_columns ads主键是逗号分隔的,可以有多个
+ String primaryKeyColumns = rs.getString(5);
+ if (StringUtils.isNotBlank(primaryKeyColumns)) {
+ tableInfo.setPrimaryKeyColumns(Arrays.asList(StringUtils.split(primaryKeyColumns, ",")));
+ } else {
+ tableInfo.setPrimaryKeyColumns(null);
+ }
+ break;
+ }
+ DBUtil.closeDBResources(rs, statement, null);
+ return tableInfo;
+
+ } catch (ClassNotFoundException e) {
+ throw new AdsException(AdsException.OTHER, e.getMessage(), e);
+ } catch (SQLException e) {
+ throw new AdsException(AdsException.OTHER, e.getMessage(), e);
+ } catch ( DataXException e) {
+ throw e;
+ } catch (Exception e) {
+ throw new AdsException(AdsException.OTHER, e.getMessage(), e);
+ } finally {
+ if (rs != null) {
+ try {
+ rs.close();
+ } catch (SQLException e) {
+ // Ignore exception
+ }
+ }
+ if (statement != null) {
+ try {
+ statement.close();
+ } catch (SQLException e) {
+ // Ignore exception
+ }
+ }
+ if (connection != null) {
+ try {
+ connection.close();
+ } catch (SQLException e) {
+ // Ignore exception
+ }
+ }
+ }
+
+ }
+
+ /**
+ * Submit LOAD DATA command.
+ *
+ * @param table The target ADS table
+ * @param partition The partition option in the form of "(partition_name,...)"
+ * @param sourcePath The source path
+ * @param overwrite
+ * @return
+ * @throws AdsException
+ */
+ public String loadData(String table, String partition, String sourcePath, boolean overwrite) throws AdsException {
+
+ if (table == null) {
+ throw new AdsException(AdsException.ADS_LOADDATA_TABLE_NULL, "ADS LOAD DATA table is null.", null);
+ }
+
+ if (sourcePath == null) {
+ throw new AdsException(AdsException.ADS_LOADDATA_SOURCEPATH_NULL, "ADS LOAD DATA source path is null.",
+ null);
+ }
+
+ if (adsURL == null) {
+ throw new AdsException(AdsException.ADS_CONN_URL_NOT_SET, "ADS JDBC connection URL was not set.", null);
+ }
+
+ if (userName == null) {
+ throw new AdsException(AdsException.ADS_CONN_USERNAME_NOT_SET,
+ "ADS JDBC connection user name was not set.", null);
+ }
+
+ if (password == null) {
+ throw new AdsException(AdsException.ADS_CONN_PASSWORD_NOT_SET, "ADS JDBC connection password was not set.",
+ null);
+ }
+
+ if (schema == null) {
+ throw new AdsException(AdsException.ADS_CONN_SCHEMA_NOT_SET, "ADS JDBC connection schema was not set.",
+ null);
+ }
+
+ StringBuilder sb = new StringBuilder();
+ sb.append("LOAD DATA FROM ");
+ if (sourcePath.startsWith("'") && sourcePath.endsWith("'")) {
+ sb.append(sourcePath);
+ } else {
+ sb.append("'" + sourcePath + "'");
+ }
+ if (overwrite) {
+ sb.append(" OVERWRITE");
+ }
+ sb.append(" INTO TABLE ");
+ sb.append(schema + "." + table);
+ if (partition != null && !partition.trim().equals("")) {
+ String partitionTrim = partition.trim();
+ if(partitionTrim.startsWith("(") && partitionTrim.endsWith(")")) {
+ sb.append(" PARTITION " + partition);
+ } else {
+ sb.append(" PARTITION " + "(" + partition + ")");
+ }
+ }
+
+ Connection connection = null;
+ Statement statement = null;
+ ResultSet rs = null;
+ try {
+ Class.forName("com.mysql.jdbc.Driver");
+ String url = AdsUtil.prepareJdbcUrl(this.adsURL, this.schema, this.socketTimeout, this.suffix);
+ Properties connectionProps = new Properties();
+ connectionProps.put("user", userName);
+ connectionProps.put("password", password);
+ connection = DriverManager.getConnection(url, connectionProps);
+ statement = connection.createStatement();
+ LOG.info("正在从ODPS数据库导数据到ADS中: "+sb.toString());
+ LOG.info("由于ADS的限制,ADS导数据最少需要20分钟,请耐心等待");
+ rs = statement.executeQuery(sb.toString());
+
+ String jobId = null;
+ while (DBUtil.asyncResultSetNext(rs)) {
+ jobId = rs.getString(1);
+ }
+
+ if (jobId == null) {
+ throw new AdsException(AdsException.ADS_LOADDATA_JOBID_NOT_AVAIL,
+ "Job id is not available for the submitted LOAD DATA." + jobId, null);
+ }
+
+ return jobId;
+
+ } catch (ClassNotFoundException e) {
+ throw new AdsException(AdsException.ADS_LOADDATA_FAILED, e.getMessage(), e);
+ } catch (SQLException e) {
+ throw new AdsException(AdsException.ADS_LOADDATA_FAILED, e.getMessage(), e);
+ } catch (Exception e) {
+ throw new AdsException(AdsException.ADS_LOADDATA_FAILED, e.getMessage(), e);
+ } finally {
+ if (rs != null) {
+ try {
+ rs.close();
+ } catch (SQLException e) {
+ // Ignore exception
+ }
+ }
+ if (statement != null) {
+ try {
+ statement.close();
+ } catch (SQLException e) {
+ // Ignore exception
+ }
+ }
+ if (connection != null) {
+ try {
+ connection.close();
+ } catch (SQLException e) {
+ // Ignore exception
+ }
+ }
+ }
+
+ }
+
+ /**
+ * Check the load data job status.
+ *
+ * @param jobId The job id to
+ * @return true if load data job succeeded, false if load data job failed.
+ * @throws AdsException
+ */
+ public boolean checkLoadDataJobStatus(String jobId) throws AdsException {
+
+ if (adsURL == null) {
+ throw new AdsException(AdsException.ADS_CONN_URL_NOT_SET, "ADS JDBC connection URL was not set.", null);
+ }
+
+ if (userName == null) {
+ throw new AdsException(AdsException.ADS_CONN_USERNAME_NOT_SET,
+ "ADS JDBC connection user name was not set.", null);
+ }
+
+ if (password == null) {
+ throw new AdsException(AdsException.ADS_CONN_PASSWORD_NOT_SET, "ADS JDBC connection password was not set.",
+ null);
+ }
+
+ if (schema == null) {
+ throw new AdsException(AdsException.ADS_CONN_SCHEMA_NOT_SET, "ADS JDBC connection schema was not set.",
+ null);
+ }
+
+ try {
+ String state = this.checkLoadDataJobStatusWithRetry(jobId);
+ if (state == null) {
+ throw new AdsException(AdsException.JOB_NOT_EXIST, "Target job does not exist for id: " + jobId, null);
+ }
+ if (state.equals("SUCCEEDED")) {
+ return true;
+ } else if (state.equals("FAILED")) {
+ throw new AdsException(AdsException.JOB_FAILED, "Target job failed for id: " + jobId, null);
+ } else {
+ return false;
+ }
+ } catch (Exception e) {
+ throw new AdsException(AdsException.OTHER, e.getMessage(), e);
+ }
+ }
+
+ private String checkLoadDataJobStatusWithRetry(final String jobId)
+ throws AdsException {
+ try {
+ Class.forName("com.mysql.jdbc.Driver");
+ final String finalAdsUrl = this.adsURL;
+ final String finalSchema = this.schema;
+ final Long finalSocketTimeout = this.socketTimeout;
+ final String suffix = this.suffix;
+ return RetryUtil.executeWithRetry(new Callable() {
+ @Override
+ public String call() throws Exception {
+ Connection connection = null;
+ Statement statement = null;
+ ResultSet rs = null;
+ try {
+
+ String url = AdsUtil.prepareJdbcUrl(finalAdsUrl, finalSchema, finalSocketTimeout, suffix);
+ Properties connectionProps = new Properties();
+ connectionProps.put("user", userName);
+ connectionProps.put("password", password);
+ connection = DriverManager.getConnection(url,
+ connectionProps);
+ statement = connection.createStatement();
+
+ String sql = "select state from information_schema.job_instances where job_id like '"
+ + jobId + "'";
+ rs = statement.executeQuery(sql);
+ String state = null;
+ while (DBUtil.asyncResultSetNext(rs)) {
+ state = rs.getString(1);
+ }
+ return state;
+ } finally {
+ if (rs != null) {
+ try {
+ rs.close();
+ } catch (SQLException e) {
+ // Ignore exception
+ }
+ }
+ if (statement != null) {
+ try {
+ statement.close();
+ } catch (SQLException e) {
+ // Ignore exception
+ }
+ }
+ if (connection != null) {
+ try {
+ connection.close();
+ } catch (SQLException e) {
+ // Ignore exception
+ }
+ }
+ }
+ }
+ }, 3, 1000L, true);
+ } catch (Exception e) {
+ throw new AdsException(AdsException.OTHER, e.getMessage(), e);
+ }
+ }
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/TableMetaHelper.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/TableMetaHelper.java
new file mode 100644
index 0000000000..1ecad7561d
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/TableMetaHelper.java
@@ -0,0 +1,87 @@
+package com.alibaba.datax.plugin.writer.adswriter.load;
+
+import com.alibaba.datax.plugin.writer.adswriter.ads.ColumnDataType;
+import com.alibaba.datax.plugin.writer.adswriter.ads.ColumnInfo;
+import com.alibaba.datax.plugin.writer.adswriter.ads.TableInfo;
+import com.alibaba.datax.plugin.writer.adswriter.odps.DataType;
+import com.alibaba.datax.plugin.writer.adswriter.odps.FieldSchema;
+import com.alibaba.datax.plugin.writer.adswriter.odps.TableMeta;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * Table meta helper for ADS writer.
+ *
+ * @since 0.0.1
+ */
+public class TableMetaHelper {
+
+ private TableMetaHelper() {
+ }
+
+ /**
+ * Create temporary ODPS table.
+ *
+ * @param tableMeta table meta
+ * @param lifeCycle for temporary table
+ * @return ODPS temporary table meta
+ */
+ public static TableMeta createTempODPSTable(TableInfo tableMeta, int lifeCycle) {
+ TableMeta tempTable = new TableMeta();
+ tempTable.setComment(tableMeta.getComments());
+ tempTable.setLifeCycle(lifeCycle);
+ String tableSchema = tableMeta.getTableSchema();
+ String tableName = tableMeta.getTableName();
+ tempTable.setTableName(generateTempTableName(tableSchema, tableName));
+ List tempColumns = new ArrayList();
+ List columns = tableMeta.getColumns();
+ for (ColumnInfo column : columns) {
+ FieldSchema tempColumn = new FieldSchema();
+ tempColumn.setName(column.getName());
+ tempColumn.setType(toODPSDataType(column.getDataType()));
+ tempColumn.setComment(column.getComment());
+ tempColumns.add(tempColumn);
+ }
+ tempTable.setCols(tempColumns);
+ tempTable.setPartitionKeys(null);
+ return tempTable;
+ }
+
+ private static String toODPSDataType(ColumnDataType columnDataType) {
+ int type;
+ switch (columnDataType.type) {
+ case ColumnDataType.BOOLEAN:
+ type = DataType.STRING;
+ break;
+ case ColumnDataType.BYTE:
+ case ColumnDataType.SHORT:
+ case ColumnDataType.INT:
+ case ColumnDataType.LONG:
+ type = DataType.INTEGER;
+ break;
+ case ColumnDataType.DECIMAL:
+ case ColumnDataType.DOUBLE:
+ case ColumnDataType.FLOAT:
+ type = DataType.DOUBLE;
+ break;
+ case ColumnDataType.DATE:
+ case ColumnDataType.TIME:
+ case ColumnDataType.TIMESTAMP:
+ case ColumnDataType.STRING:
+ case ColumnDataType.MULTI_VALUE:
+ type = DataType.STRING;
+ break;
+ default:
+ throw new IllegalArgumentException("columnDataType=" + columnDataType);
+ }
+ return DataType.toString(type);
+ }
+
+ private static String generateTempTableName(String tableSchema, String tableName) {
+ int randNum = 1000 + new Random(System.currentTimeMillis()).nextInt(1000);
+ return tableSchema + "__" + tableName + "_" + System.currentTimeMillis() + randNum;
+ }
+
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/TransferProjectConf.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/TransferProjectConf.java
new file mode 100644
index 0000000000..bff4b7b900
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/TransferProjectConf.java
@@ -0,0 +1,65 @@
+package com.alibaba.datax.plugin.writer.adswriter.load;
+
+import com.alibaba.datax.common.util.Configuration;
+
+/**
+ * Created by xiafei.qiuxf on 15/4/13.
+ */
+public class TransferProjectConf {
+
+ public final static String KEY_ACCESS_ID = "odps.accessId";
+ public final static String KEY_ACCESS_KEY = "odps.accessKey";
+ public final static String KEY_ACCOUNT = "odps.account";
+ public final static String KEY_ODPS_SERVER = "odps.odpsServer";
+ public final static String KEY_ODPS_TUNNEL = "odps.tunnelServer";
+ public final static String KEY_ACCOUNT_TYPE = "odps.accountType";
+ public final static String KEY_PROJECT = "odps.project";
+
+ private String accessId;
+ private String accessKey;
+ private String account;
+ private String odpsServer;
+ private String odpsTunnel;
+ private String accountType;
+ private String project;
+
+ public static TransferProjectConf create(Configuration adsWriterConf) {
+ TransferProjectConf res = new TransferProjectConf();
+ res.accessId = adsWriterConf.getString(KEY_ACCESS_ID);
+ res.accessKey = adsWriterConf.getString(KEY_ACCESS_KEY);
+ res.account = adsWriterConf.getString(KEY_ACCOUNT);
+ res.odpsServer = adsWriterConf.getString(KEY_ODPS_SERVER);
+ res.odpsTunnel = adsWriterConf.getString(KEY_ODPS_TUNNEL);
+ res.accountType = adsWriterConf.getString(KEY_ACCOUNT_TYPE, "aliyun");
+ res.project = adsWriterConf.getString(KEY_PROJECT);
+ return res;
+ }
+
+ public String getAccessId() {
+ return accessId;
+ }
+
+ public String getAccessKey() {
+ return accessKey;
+ }
+
+ public String getAccount() {
+ return account;
+ }
+
+ public String getOdpsServer() {
+ return odpsServer;
+ }
+
+ public String getOdpsTunnel() {
+ return odpsTunnel;
+ }
+
+ public String getAccountType() {
+ return accountType;
+ }
+
+ public String getProject() {
+ return project;
+ }
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/DataType.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/DataType.java
new file mode 100644
index 0000000000..595b1dfd26
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/DataType.java
@@ -0,0 +1,77 @@
+package com.alibaba.datax.plugin.writer.adswriter.odps;
+
+/**
+ * ODPS 数据类型.
+ *
+ * 当前定义了如下类型:
+ *
+ * INTEGER
+ * DOUBLE
+ * BOOLEAN
+ * STRING
+ * DATETIME
+ *
+ *
+ *
+ * @since 0.0.1
+ */
+public class DataType {
+
+ public final static byte INTEGER = 0;
+ public final static byte DOUBLE = 1;
+ public final static byte BOOLEAN = 2;
+ public final static byte STRING = 3;
+ public final static byte DATETIME = 4;
+
+ public static String toString(int type) {
+ switch (type) {
+ case INTEGER:
+ return "bigint";
+ case DOUBLE:
+ return "double";
+ case BOOLEAN:
+ return "boolean";
+ case STRING:
+ return "string";
+ case DATETIME:
+ return "datetime";
+ default:
+ throw new IllegalArgumentException("type=" + type);
+ }
+ }
+
+ /**
+ * 字符串的数据类型转换为byte常量定义的数据类型.
+ *
+ * 转换规则:
+ *
+ * tinyint, int, bigint, long - {@link #INTEGER}
+ * double, float - {@link #DOUBLE}
+ * string - {@link #STRING}
+ * boolean, bool - {@link #BOOLEAN}
+ * datetime - {@link #DATETIME}
+ *
+ *
+ *
+ * @param type 字符串的数据类型
+ * @return byte常量定义的数据类型
+ * @throws IllegalArgumentException
+ */
+ public static byte convertToDataType(String type) throws IllegalArgumentException {
+ type = type.toLowerCase().trim();
+ if ("string".equals(type)) {
+ return STRING;
+ } else if ("bigint".equals(type) || "int".equals(type) || "tinyint".equals(type) || "long".equals(type)) {
+ return INTEGER;
+ } else if ("boolean".equals(type) || "bool".equals(type)) {
+ return BOOLEAN;
+ } else if ("double".equals(type) || "float".equals(type)) {
+ return DOUBLE;
+ } else if ("datetime".equals(type)) {
+ return DATETIME;
+ } else {
+ throw new IllegalArgumentException("unkown type: " + type);
+ }
+ }
+
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/FieldSchema.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/FieldSchema.java
new file mode 100644
index 0000000000..701ee261cf
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/FieldSchema.java
@@ -0,0 +1,63 @@
+package com.alibaba.datax.plugin.writer.adswriter.odps;
+
+/**
+ * ODPS列属性,包含列名和类型 列名和类型与SQL的DESC表或分区显示的列名和类型一致
+ *
+ * @since 0.0.1
+ */
+public class FieldSchema {
+
+ /** 列名 */
+ private String name;
+
+ /** 列类型,如:string, bigint, boolean, datetime等等 */
+ private String type;
+
+ private String comment;
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getType() {
+ return type;
+ }
+
+ public void setType(String type) {
+ this.type = type;
+ }
+
+ public String getComment() {
+ return comment;
+ }
+
+ public void setComment(String comment) {
+ this.comment = comment;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder builder = new StringBuilder();
+ builder.append("FieldSchema [name=").append(name).append(", type=").append(type).append(", comment=")
+ .append(comment).append("]");
+ return builder.toString();
+ }
+
+ /**
+ * @return "col_name data_type [COMMENT col_comment]"
+ */
+ public String toDDL() {
+ StringBuilder builder = new StringBuilder();
+ builder.append(name).append(" ").append(type);
+ String comment = this.comment;
+ if (comment != null && comment.length() > 0) {
+ builder.append(" ").append("COMMENT \"" + comment + "\"");
+ }
+ return builder.toString();
+ }
+
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/TableMeta.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/TableMeta.java
new file mode 100644
index 0000000000..d0adc4eae6
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/TableMeta.java
@@ -0,0 +1,114 @@
+package com.alibaba.datax.plugin.writer.adswriter.odps;
+
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * ODPS table meta.
+ *
+ * @since 0.0.1
+ */
+public class TableMeta {
+
+ private String tableName;
+
+ private List cols;
+
+ private List partitionKeys;
+
+ private int lifeCycle;
+
+ private String comment;
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public void setTableName(String tableName) {
+ this.tableName = tableName;
+ }
+
+ public List getCols() {
+ return cols;
+ }
+
+ public void setCols(List cols) {
+ this.cols = cols;
+ }
+
+ public List getPartitionKeys() {
+ return partitionKeys;
+ }
+
+ public void setPartitionKeys(List partitionKeys) {
+ this.partitionKeys = partitionKeys;
+ }
+
+ public int getLifeCycle() {
+ return lifeCycle;
+ }
+
+ public void setLifeCycle(int lifeCycle) {
+ this.lifeCycle = lifeCycle;
+ }
+
+ public String getComment() {
+ return comment;
+ }
+
+ public void setComment(String comment) {
+ this.comment = comment;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder builder = new StringBuilder();
+ builder.append("TableMeta [tableName=").append(tableName).append(", cols=").append(cols)
+ .append(", partitionKeys=").append(partitionKeys).append(", lifeCycle=").append(lifeCycle)
+ .append(", comment=").append(comment).append("]");
+ return builder.toString();
+ }
+
+ /**
+ * @return
+ * "CREATE TABLE [IF NOT EXISTS] table_name
+ * [(col_name data_type [COMMENT col_comment], ...)]
+ * [COMMENT table_comment]
+ * [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
+ * [LIFECYCLE days]
+ * [AS select_statement] "
+ */
+ public String toDDL() {
+ StringBuilder builder = new StringBuilder();
+ builder.append("CREATE TABLE " + tableName).append(" ");
+ List cols = this.cols;
+ if (cols != null && cols.size() > 0) {
+ builder.append("(").append(toDDL(cols)).append(")").append(" ");
+ }
+ String comment = this.comment;
+ if (comment != null && comment.length() > 0) {
+ builder.append("COMMENT \"" + comment + "\" ");
+ }
+ List partitionKeys = this.partitionKeys;
+ if (partitionKeys != null && partitionKeys.size() > 0) {
+ builder.append("PARTITIONED BY ");
+ builder.append("(").append(toDDL(partitionKeys)).append(")").append(" ");
+ }
+ if (lifeCycle > 0) {
+ builder.append("LIFECYCLE " + lifeCycle).append(" ");
+ }
+ builder.append(";");
+ return builder.toString();
+ }
+
+ private String toDDL(List cols) {
+ StringBuilder builder = new StringBuilder();
+ Iterator iter = cols.iterator();
+ builder.append(iter.next().toDDL());
+ while (iter.hasNext()) {
+ builder.append(", ").append(iter.next().toDDL());
+ }
+ return builder.toString();
+ }
+
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/package-info.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/package-info.java
new file mode 100644
index 0000000000..92dfd09da4
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/package-info.java
@@ -0,0 +1,6 @@
+/**
+ * ODPS meta.
+ *
+ * @since 0.0.1
+ */
+package com.alibaba.datax.plugin.writer.adswriter.odps;
\ No newline at end of file
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/package-info.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/package-info.java
new file mode 100644
index 0000000000..139a39106a
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/package-info.java
@@ -0,0 +1,6 @@
+/**
+ * ADS Writer.
+ *
+ * @since 0.0.1
+ */
+package com.alibaba.datax.plugin.writer.adswriter;
\ No newline at end of file
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/AdsUtil.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/AdsUtil.java
new file mode 100644
index 0000000000..4336d4773f
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/AdsUtil.java
@@ -0,0 +1,175 @@
+package com.alibaba.datax.plugin.writer.adswriter.util;
+
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.util.DBUtil;
+import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
+import com.alibaba.datax.plugin.writer.adswriter.load.AdsHelper;
+import com.alibaba.datax.plugin.writer.adswriter.AdsWriterErrorCode;
+import com.alibaba.datax.plugin.writer.adswriter.load.TransferProjectConf;
+import com.alibaba.datax.plugin.writer.adswriter.odps.FieldSchema;
+import com.alibaba.datax.plugin.writer.adswriter.odps.TableMeta;
+
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.Connection;
+import java.util.ArrayList;
+import java.util.List;
+
+public class AdsUtil {
+ private static final Logger LOG = LoggerFactory.getLogger(AdsUtil.class);
+
+ /*检查配置文件中必填的配置项是否都已填
+ * */
+ public static void checkNecessaryConfig(Configuration originalConfig, String writeMode) {
+ //检查ADS必要参数
+ originalConfig.getNecessaryValue(Key.ADS_URL,
+ AdsWriterErrorCode.REQUIRED_VALUE);
+ originalConfig.getNecessaryValue(Key.USERNAME,
+ AdsWriterErrorCode.REQUIRED_VALUE);
+ originalConfig.getNecessaryValue(Key.PASSWORD,
+ AdsWriterErrorCode.REQUIRED_VALUE);
+ originalConfig.getNecessaryValue(Key.SCHEMA,
+ AdsWriterErrorCode.REQUIRED_VALUE);
+ if(Constant.LOADMODE.equals(writeMode)) {
+ originalConfig.getNecessaryValue(Key.Life_CYCLE,
+ AdsWriterErrorCode.REQUIRED_VALUE);
+ Integer lifeCycle = originalConfig.getInt(Key.Life_CYCLE);
+ if (lifeCycle <= 0) {
+ throw DataXException.asDataXException(AdsWriterErrorCode.INVALID_CONFIG_VALUE, "配置项[lifeCycle]的值必须大于零.");
+ }
+ originalConfig.getNecessaryValue(Key.ADS_TABLE,
+ AdsWriterErrorCode.REQUIRED_VALUE);
+ Boolean overwrite = originalConfig.getBool(Key.OVER_WRITE);
+ if (overwrite == null) {
+ throw DataXException.asDataXException(AdsWriterErrorCode.REQUIRED_VALUE, "配置项[overWrite]是必填项.");
+ }
+ }
+ if (Constant.STREAMMODE.equalsIgnoreCase(writeMode)) {
+ originalConfig.getNecessaryValue(Key.OPIndex, AdsWriterErrorCode.REQUIRED_VALUE);
+ }
+ }
+
+ /*生成AdsHelp实例
+ * */
+ public static AdsHelper createAdsHelper(Configuration originalConfig){
+ //Get adsUrl,userName,password,schema等参数,创建AdsHelp实例
+ String adsUrl = originalConfig.getString(Key.ADS_URL);
+ String userName = originalConfig.getString(Key.USERNAME);
+ String password = originalConfig.getString(Key.PASSWORD);
+ String schema = originalConfig.getString(Key.SCHEMA);
+ Long socketTimeout = originalConfig.getLong(Key.SOCKET_TIMEOUT, Constant.DEFAULT_SOCKET_TIMEOUT);
+ String suffix = originalConfig.getString(Key.JDBC_URL_SUFFIX, "");
+ return new AdsHelper(adsUrl,userName,password,schema,socketTimeout,suffix);
+ }
+
+ public static AdsHelper createAdsHelperWithOdpsAccount(Configuration originalConfig) {
+ String adsUrl = originalConfig.getString(Key.ADS_URL);
+ String userName = originalConfig.getString(TransferProjectConf.KEY_ACCESS_ID);
+ String password = originalConfig.getString(TransferProjectConf.KEY_ACCESS_KEY);
+ String schema = originalConfig.getString(Key.SCHEMA);
+ Long socketTimeout = originalConfig.getLong(Key.SOCKET_TIMEOUT, Constant.DEFAULT_SOCKET_TIMEOUT);
+ String suffix = originalConfig.getString(Key.JDBC_URL_SUFFIX, "");
+ return new AdsHelper(adsUrl, userName, password, schema,socketTimeout,suffix);
+ }
+
+ /*生成ODPSWriter Plugin所需要的配置文件
+ * */
+ public static Configuration generateConf(Configuration originalConfig, String odpsTableName, TableMeta tableMeta, TransferProjectConf transConf){
+ Configuration newConfig = originalConfig.clone();
+ newConfig.set(Key.ODPSTABLENAME, odpsTableName);
+ newConfig.set(Key.ODPS_SERVER, transConf.getOdpsServer());
+ newConfig.set(Key.TUNNEL_SERVER,transConf.getOdpsTunnel());
+ newConfig.set(Key.ACCESS_ID,transConf.getAccessId());
+ newConfig.set(Key.ACCESS_KEY,transConf.getAccessKey());
+ newConfig.set(Key.PROJECT,transConf.getProject());
+ newConfig.set(Key.TRUNCATE, true);
+ newConfig.set(Key.PARTITION,null);
+// newConfig.remove(Key.PARTITION);
+ List cols = tableMeta.getCols();
+ List allColumns = new ArrayList();
+ if(cols != null && !cols.isEmpty()){
+ for(FieldSchema col:cols){
+ allColumns.add(col.getName());
+ }
+ }
+ newConfig.set(Key.COLUMN,allColumns);
+ return newConfig;
+ }
+
+ /*生成ADS数据导入时的source_path
+ * */
+ public static String generateSourcePath(String project, String tmpOdpsTableName, String odpsPartition){
+ StringBuilder builder = new StringBuilder();
+ String partition = transferOdpsPartitionToAds(odpsPartition);
+ builder.append("odps://").append(project).append("/").append(tmpOdpsTableName);
+ if(odpsPartition != null && !odpsPartition.isEmpty()){
+ builder.append("/").append(partition);
+ }
+ return builder.toString();
+ }
+
+ public static String transferOdpsPartitionToAds(String odpsPartition){
+ if(odpsPartition == null || odpsPartition.isEmpty())
+ return null;
+ String adsPartition = formatPartition(odpsPartition);;
+ String[] partitions = adsPartition.split("/");
+ for(int last = partitions.length; last > 0; last--){
+
+ String partitionPart = partitions[last-1];
+ String newPart = partitionPart.replace(".*", "*").replace("*", ".*");
+ if(newPart.split("=")[1].equals(".*")){
+ adsPartition = adsPartition.substring(0,adsPartition.length()-partitionPart.length());
+ }else{
+ break;
+ }
+ if(adsPartition.endsWith("/")){
+ adsPartition = adsPartition.substring(0,adsPartition.length()-1);
+ }
+ }
+ if (adsPartition.contains("*"))
+ throw DataXException.asDataXException(AdsWriterErrorCode.ODPS_PARTITION_FAILED, "");
+ return adsPartition;
+ }
+
+ public static String formatPartition(String partition) {
+ return partition.trim().replaceAll(" *= *", "=")
+ .replaceAll(" */ *", ",").replaceAll(" *, *", ",")
+ .replaceAll("'", "").replaceAll(",", "/");
+ }
+
+ public static String prepareJdbcUrl(Configuration conf) {
+ String adsURL = conf.getString(Key.ADS_URL);
+ String schema = conf.getString(Key.SCHEMA);
+ Long socketTimeout = conf.getLong(Key.SOCKET_TIMEOUT,
+ Constant.DEFAULT_SOCKET_TIMEOUT);
+ String suffix = conf.getString(Key.JDBC_URL_SUFFIX, "");
+ return AdsUtil.prepareJdbcUrl(adsURL, schema, socketTimeout, suffix);
+ }
+
+ public static String prepareJdbcUrl(String adsURL, String schema,
+ Long socketTimeout, String suffix) {
+ String jdbcUrl = null;
+ // like autoReconnect=true&failOverReadOnly=false&maxReconnects=10
+ if (StringUtils.isNotBlank(suffix)) {
+ jdbcUrl = String
+ .format("jdbc:mysql://%s/%s?useUnicode=true&characterEncoding=UTF-8&socketTimeout=%s&%s",
+ adsURL, schema, socketTimeout, suffix);
+ } else {
+ jdbcUrl = String
+ .format("jdbc:mysql://%s/%s?useUnicode=true&characterEncoding=UTF-8&socketTimeout=%s",
+ adsURL, schema, socketTimeout);
+ }
+ return jdbcUrl;
+ }
+
+ public static Connection getAdsConnect(Configuration conf) {
+ String userName = conf.getString(Key.USERNAME);
+ String passWord = conf.getString(Key.PASSWORD);
+ String jdbcUrl = AdsUtil.prepareJdbcUrl(conf);
+ Connection connection = DBUtil.getConnection(DataBaseType.ADS, jdbcUrl, userName, passWord);
+ return connection;
+ }
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/Constant.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/Constant.java
new file mode 100644
index 0000000000..f0ab71ec18
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/Constant.java
@@ -0,0 +1,29 @@
+package com.alibaba.datax.plugin.writer.adswriter.util;
+
+public class Constant {
+
+ public static final String LOADMODE = "load";
+
+ public static final String INSERTMODE = "insert";
+
+ public static final String DELETEMODE = "delete";
+
+ public static final String REPLACEMODE = "replace";
+
+ public static final String STREAMMODE = "stream";
+
+ public static final int DEFAULT_BATCH_SIZE = 32;
+
+ public static final long DEFAULT_SOCKET_TIMEOUT = 3600000L;
+
+ public static final int DEFAULT_RETRY_TIMES = 2;
+
+ public static final String INSERT_TEMPLATE = "insert into %s ( %s ) values ";
+
+ public static final String DELETE_TEMPLATE = "delete from %s where ";
+
+ public static final String ADS_TABLE_INFO = "adsTableInfo";
+
+ public static final String ADS_QUOTE_CHARACTER = "`";
+
+}
diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/Key.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/Key.java
new file mode 100644
index 0000000000..3d31c8186f
--- /dev/null
+++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/Key.java
@@ -0,0 +1,62 @@
+package com.alibaba.datax.plugin.writer.adswriter.util;
+
+
+public final class Key {
+
+ public final static String ADS_URL = "url";
+
+ public final static String USERNAME = "username";
+
+ public final static String PASSWORD = "password";
+
+ public final static String SCHEMA = "schema";
+
+ public final static String ADS_TABLE = "table";
+
+ public final static String Life_CYCLE = "lifeCycle";
+
+ public final static String OVER_WRITE = "overWrite";
+
+ public final static String WRITE_MODE = "writeMode";
+
+
+ public final static String COLUMN = "column";
+
+ public final static String OPIndex = "opIndex";
+
+ public final static String EMPTY_AS_NULL = "emptyAsNull";
+
+ public final static String BATCH_SIZE = "batchSize";
+
+ public final static String BUFFER_SIZE = "bufferSize";
+
+ public final static String PRE_SQL = "preSql";
+
+ public final static String POST_SQL = "postSql";
+
+ public final static String SOCKET_TIMEOUT = "socketTimeout";
+
+ public final static String RETRY_CONNECTION_TIME = "retryTimes";
+
+ public final static String JDBC_URL_SUFFIX = "urlSuffix";
+
+ /**
+ * 以下是odps writer的key
+ */
+ public final static String PARTITION = "partition";
+
+ public final static String ODPSTABLENAME = "table";
+
+ public final static String ODPS_SERVER = "odpsServer";
+
+ public final static String TUNNEL_SERVER = "tunnelServer";
+
+ public final static String ACCESS_ID = "accessId";
+
+ public final static String ACCESS_KEY = "accessKey";
+
+ public final static String PROJECT = "project";
+
+ public final static String TRUNCATE = "truncate";
+
+}
\ No newline at end of file
diff --git a/adswriter/src/main/resources/plugin.json b/adswriter/src/main/resources/plugin.json
new file mode 100644
index 0000000000..a70fb36462
--- /dev/null
+++ b/adswriter/src/main/resources/plugin.json
@@ -0,0 +1,6 @@
+{
+ "name": "adswriter",
+ "class": "com.alibaba.datax.plugin.writer.adswriter.AdsWriter",
+ "description": "",
+ "developer": "alibaba"
+}
\ No newline at end of file
diff --git a/adswriter/src/main/resources/plugin_job_template.json b/adswriter/src/main/resources/plugin_job_template.json
new file mode 100644
index 0000000000..0753a226e8
--- /dev/null
+++ b/adswriter/src/main/resources/plugin_job_template.json
@@ -0,0 +1,13 @@
+{
+ "name": "adswriter",
+ "parameter": {
+ "url": "",
+ "username": "",
+ "password": "",
+ "schema": "",
+ "table": "",
+ "partition": "",
+ "overWrite": "",
+ "lifeCycle": 2
+ }
+}
\ No newline at end of file
diff --git a/common/pom.xml b/common/pom.xml
new file mode 100755
index 0000000000..6cce789f23
--- /dev/null
+++ b/common/pom.xml
@@ -0,0 +1,75 @@
+
+ 4.0.0
+
+ com.alibaba.datax
+ datax-all
+ 0.0.1-SNAPSHOT
+
+
+ datax-common
+ datax-common
+ jar
+
+
+
+ org.apache.commons
+ commons-lang3
+
+
+ com.alibaba
+ fastjson
+
+
+ commons-io
+ commons-io
+
+
+
+ junit
+ junit
+ test
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ ch.qos.logback
+ logback-classic
+
+
+
+ org.apache.httpcomponents
+ httpclient
+ 4.4
+ test
+
+
+ org.apache.httpcomponents
+ fluent-hc
+ 4.4
+ test
+
+
+ org.apache.commons
+ commons-math3
+ 3.1.1
+
+
+
+
+
+
+ maven-compiler-plugin
+
+ 1.6
+ 1.6
+ ${project-sourceEncoding}
+
+
+
+
+
diff --git a/common/src/main/java/com/alibaba/datax/common/base/BaseObject.java b/common/src/main/java/com/alibaba/datax/common/base/BaseObject.java
new file mode 100755
index 0000000000..e7d06a9503
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/base/BaseObject.java
@@ -0,0 +1,25 @@
+package com.alibaba.datax.common.base;
+
+import org.apache.commons.lang3.builder.EqualsBuilder;
+import org.apache.commons.lang3.builder.HashCodeBuilder;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
+
+public class BaseObject {
+
+ @Override
+ public int hashCode() {
+ return HashCodeBuilder.reflectionHashCode(this, false);
+ }
+
+ @Override
+ public boolean equals(Object object) {
+ return EqualsBuilder.reflectionEquals(this, object, false);
+ }
+
+ @Override
+ public String toString() {
+ return ToStringBuilder.reflectionToString(this,
+ ToStringStyle.MULTI_LINE_STYLE);
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/constant/CommonConstant.java b/common/src/main/java/com/alibaba/datax/common/constant/CommonConstant.java
new file mode 100755
index 0000000000..423e16f926
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/constant/CommonConstant.java
@@ -0,0 +1,9 @@
+package com.alibaba.datax.common.constant;
+
+public final class CommonConstant {
+ /**
+ * 用于插件对自身 split 的每个 task 标识其使用的资源,以告知core 对 reader/writer split 之后的 task 进行拼接时需要根据资源标签进行更有意义的 shuffle 操作
+ */
+ public static String LOAD_BALANCE_RESOURCE_MARK = "loadBalanceResourceMark";
+
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/constant/PluginType.java b/common/src/main/java/com/alibaba/datax/common/constant/PluginType.java
new file mode 100755
index 0000000000..ceee089e9e
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/constant/PluginType.java
@@ -0,0 +1,20 @@
+package com.alibaba.datax.common.constant;
+
+/**
+ * Created by jingxing on 14-8-31.
+ */
+public enum PluginType {
+ //pluginType还代表了资源目录,很难扩展,或者说需要足够必要才扩展。先mark Handler(其实和transformer一样),再讨论
+ READER("reader"), TRANSFORMER("transformer"), WRITER("writer"), HANDLER("handler");
+
+ private String pluginType;
+
+ private PluginType(String pluginType) {
+ this.pluginType = pluginType;
+ }
+
+ @Override
+ public String toString() {
+ return this.pluginType;
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java b/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java
new file mode 100755
index 0000000000..7699e152ae
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java
@@ -0,0 +1,115 @@
+package com.alibaba.datax.common.element;
+
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.Date;
+
+/**
+ * Created by jingxing on 14-8-24.
+ */
+public class BoolColumn extends Column {
+
+ public BoolColumn(Boolean bool) {
+ super(bool, Column.Type.BOOL, 1);
+ }
+
+ public BoolColumn(final String data) {
+ this(true);
+ this.validate(data);
+ if (null == data) {
+ this.setRawData(null);
+ this.setByteSize(0);
+ } else {
+ this.setRawData(Boolean.valueOf(data));
+ this.setByteSize(1);
+ }
+ return;
+ }
+
+ public BoolColumn() {
+ super(null, Column.Type.BOOL, 1);
+ }
+
+ @Override
+ public Boolean asBoolean() {
+ if (null == super.getRawData()) {
+ return null;
+ }
+
+ return (Boolean) super.getRawData();
+ }
+
+ @Override
+ public Long asLong() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ return this.asBoolean() ? 1L : 0L;
+ }
+
+ @Override
+ public Double asDouble() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ return this.asBoolean() ? 1.0d : 0.0d;
+ }
+
+ @Override
+ public String asString() {
+ if (null == super.getRawData()) {
+ return null;
+ }
+
+ return this.asBoolean() ? "true" : "false";
+ }
+
+ @Override
+ public BigInteger asBigInteger() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ return BigInteger.valueOf(this.asLong());
+ }
+
+ @Override
+ public BigDecimal asBigDecimal() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ return BigDecimal.valueOf(this.asLong());
+ }
+
+ @Override
+ public Date asDate() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Bool类型不能转为Date .");
+ }
+
+ @Override
+ public byte[] asBytes() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Boolean类型不能转为Bytes .");
+ }
+
+ private void validate(final String data) {
+ if (null == data) {
+ return;
+ }
+
+ if ("true".equalsIgnoreCase(data) || "false".equalsIgnoreCase(data)) {
+ return;
+ }
+
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT,
+ String.format("String[%s]不能转为Bool .", data));
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java b/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java
new file mode 100755
index 0000000000..d3cc599361
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java
@@ -0,0 +1,84 @@
+package com.alibaba.datax.common.element;
+
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+import org.apache.commons.lang3.ArrayUtils;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.Date;
+
+/**
+ * Created by jingxing on 14-8-24.
+ */
+public class BytesColumn extends Column {
+
+ public BytesColumn() {
+ this(null);
+ }
+
+ public BytesColumn(byte[] bytes) {
+ super(ArrayUtils.clone(bytes), Column.Type.BYTES, null == bytes ? 0
+ : bytes.length);
+ }
+
+ @Override
+ public byte[] asBytes() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ return (byte[]) this.getRawData();
+ }
+
+ @Override
+ public String asString() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ try {
+ return ColumnCast.bytes2String(this);
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT,
+ String.format("Bytes[%s]不能转为String .", this.toString()));
+ }
+ }
+
+ @Override
+ public Long asLong() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Long .");
+ }
+
+ @Override
+ public BigDecimal asBigDecimal() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为BigDecimal .");
+ }
+
+ @Override
+ public BigInteger asBigInteger() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为BigInteger .");
+ }
+
+ @Override
+ public Double asDouble() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Long .");
+ }
+
+ @Override
+ public Date asDate() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Date .");
+ }
+
+ @Override
+ public Boolean asBoolean() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Boolean .");
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/element/Column.java b/common/src/main/java/com/alibaba/datax/common/element/Column.java
new file mode 100755
index 0000000000..ed68e88d6b
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/element/Column.java
@@ -0,0 +1,75 @@
+package com.alibaba.datax.common.element;
+
+import com.alibaba.fastjson.JSON;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.Date;
+
+/**
+ * Created by jingxing on 14-8-24.
+ *
+ */
+public abstract class Column {
+
+ private Type type;
+
+ private Object rawData;
+
+ private int byteSize;
+
+ public Column(final Object object, final Type type, int byteSize) {
+ this.rawData = object;
+ this.type = type;
+ this.byteSize = byteSize;
+ }
+
+ public Object getRawData() {
+ return this.rawData;
+ }
+
+ public Type getType() {
+ return this.type;
+ }
+
+ public int getByteSize() {
+ return this.byteSize;
+ }
+
+ protected void setType(Type type) {
+ this.type = type;
+ }
+
+ protected void setRawData(Object rawData) {
+ this.rawData = rawData;
+ }
+
+ protected void setByteSize(int byteSize) {
+ this.byteSize = byteSize;
+ }
+
+ public abstract Long asLong();
+
+ public abstract Double asDouble();
+
+ public abstract String asString();
+
+ public abstract Date asDate();
+
+ public abstract byte[] asBytes();
+
+ public abstract Boolean asBoolean();
+
+ public abstract BigDecimal asBigDecimal();
+
+ public abstract BigInteger asBigInteger();
+
+ @Override
+ public String toString() {
+ return JSON.toJSONString(this);
+ }
+
+ public enum Type {
+ BAD, NULL, INT, LONG, DOUBLE, STRING, BOOL, DATE, BYTES
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java b/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java
new file mode 100755
index 0000000000..89d0a7c627
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java
@@ -0,0 +1,199 @@
+package com.alibaba.datax.common.element;
+
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.util.Configuration;
+import org.apache.commons.lang3.time.DateFormatUtils;
+import org.apache.commons.lang3.time.FastDateFormat;
+
+import java.io.UnsupportedEncodingException;
+import java.text.ParseException;
+import java.util.*;
+
+public final class ColumnCast {
+
+ public static void bind(final Configuration configuration) {
+ StringCast.init(configuration);
+ DateCast.init(configuration);
+ BytesCast.init(configuration);
+ }
+
+ public static Date string2Date(final StringColumn column)
+ throws ParseException {
+ return StringCast.asDate(column);
+ }
+
+ public static byte[] string2Bytes(final StringColumn column)
+ throws UnsupportedEncodingException {
+ return StringCast.asBytes(column);
+ }
+
+ public static String date2String(final DateColumn column) {
+ return DateCast.asString(column);
+ }
+
+ public static String bytes2String(final BytesColumn column)
+ throws UnsupportedEncodingException {
+ return BytesCast.asString(column);
+ }
+}
+
+class StringCast {
+ static String datetimeFormat = "yyyy-MM-dd HH:mm:ss";
+
+ static String dateFormat = "yyyy-MM-dd";
+
+ static String timeFormat = "HH:mm:ss";
+
+ static List extraFormats = Collections.emptyList();
+
+ static String timeZone = "GMT+8";
+
+ static FastDateFormat dateFormatter;
+
+ static FastDateFormat timeFormatter;
+
+ static FastDateFormat datetimeFormatter;
+
+ static TimeZone timeZoner;
+
+ static String encoding = "UTF-8";
+
+ static void init(final Configuration configuration) {
+ StringCast.datetimeFormat = configuration.getString(
+ "common.column.datetimeFormat", StringCast.datetimeFormat);
+ StringCast.dateFormat = configuration.getString(
+ "common.column.dateFormat", StringCast.dateFormat);
+ StringCast.timeFormat = configuration.getString(
+ "common.column.timeFormat", StringCast.timeFormat);
+ StringCast.extraFormats = configuration.getList(
+ "common.column.extraFormats", Collections.emptyList(), String.class);
+
+ StringCast.timeZone = configuration.getString("common.column.timeZone",
+ StringCast.timeZone);
+ StringCast.timeZoner = TimeZone.getTimeZone(StringCast.timeZone);
+
+ StringCast.datetimeFormatter = FastDateFormat.getInstance(
+ StringCast.datetimeFormat, StringCast.timeZoner);
+ StringCast.dateFormatter = FastDateFormat.getInstance(
+ StringCast.dateFormat, StringCast.timeZoner);
+ StringCast.timeFormatter = FastDateFormat.getInstance(
+ StringCast.timeFormat, StringCast.timeZoner);
+
+ StringCast.encoding = configuration.getString("common.column.encoding",
+ StringCast.encoding);
+ }
+
+ static Date asDate(final StringColumn column) throws ParseException {
+ if (null == column.asString()) {
+ return null;
+ }
+
+ try {
+ return StringCast.datetimeFormatter.parse(column.asString());
+ } catch (ParseException ignored) {
+ }
+
+ try {
+ return StringCast.dateFormatter.parse(column.asString());
+ } catch (ParseException ignored) {
+ }
+
+ ParseException e;
+ try {
+ return StringCast.timeFormatter.parse(column.asString());
+ } catch (ParseException ignored) {
+ e = ignored;
+ }
+
+ for (String format : StringCast.extraFormats) {
+ try{
+ return FastDateFormat.getInstance(format, StringCast.timeZoner).parse(column.asString());
+ } catch (ParseException ignored){
+ e = ignored;
+ }
+ }
+ throw e;
+ }
+
+ static byte[] asBytes(final StringColumn column)
+ throws UnsupportedEncodingException {
+ if (null == column.asString()) {
+ return null;
+ }
+
+ return column.asString().getBytes(StringCast.encoding);
+ }
+}
+
+/**
+ * 后续为了可维护性,可以考虑直接使用 apache 的DateFormatUtils.
+ *
+ * 迟南已经修复了该问题,但是为了维护性,还是直接使用apache的内置函数
+ */
+class DateCast {
+
+ static String datetimeFormat = "yyyy-MM-dd HH:mm:ss";
+
+ static String dateFormat = "yyyy-MM-dd";
+
+ static String timeFormat = "HH:mm:ss";
+
+ static String timeZone = "GMT+8";
+
+ static TimeZone timeZoner = TimeZone.getTimeZone(DateCast.timeZone);
+
+ static void init(final Configuration configuration) {
+ DateCast.datetimeFormat = configuration.getString(
+ "common.column.datetimeFormat", datetimeFormat);
+ DateCast.timeFormat = configuration.getString(
+ "common.column.timeFormat", timeFormat);
+ DateCast.dateFormat = configuration.getString(
+ "common.column.dateFormat", dateFormat);
+ DateCast.timeZone = configuration.getString("common.column.timeZone",
+ DateCast.timeZone);
+ DateCast.timeZoner = TimeZone.getTimeZone(DateCast.timeZone);
+ return;
+ }
+
+ static String asString(final DateColumn column) {
+ if (null == column.asDate()) {
+ return null;
+ }
+
+ switch (column.getSubType()) {
+ case DATE:
+ return DateFormatUtils.format(column.asDate(), DateCast.dateFormat,
+ DateCast.timeZoner);
+ case TIME:
+ return DateFormatUtils.format(column.asDate(), DateCast.timeFormat,
+ DateCast.timeZoner);
+ case DATETIME:
+ return DateFormatUtils.format(column.asDate(),
+ DateCast.datetimeFormat, DateCast.timeZoner);
+ default:
+ throw DataXException
+ .asDataXException(CommonErrorCode.CONVERT_NOT_SUPPORT,
+ "时间类型出现不支持类型,目前仅支持DATE/TIME/DATETIME。该类型属于编程错误,请反馈给DataX开发团队 .");
+ }
+ }
+}
+
+class BytesCast {
+ static String encoding = "utf-8";
+
+ static void init(final Configuration configuration) {
+ BytesCast.encoding = configuration.getString("common.column.encoding",
+ BytesCast.encoding);
+ return;
+ }
+
+ static String asString(final BytesColumn column)
+ throws UnsupportedEncodingException {
+ if (null == column.asBytes()) {
+ return null;
+ }
+
+ return new String(column.asBytes(), encoding);
+ }
+}
\ No newline at end of file
diff --git a/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java b/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java
new file mode 100755
index 0000000000..6626a6fbdd
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java
@@ -0,0 +1,130 @@
+package com.alibaba.datax.common.element;
+
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.Date;
+
+/**
+ * Created by jingxing on 14-8-24.
+ */
+public class DateColumn extends Column {
+
+ private DateType subType = DateType.DATETIME;
+
+ public static enum DateType {
+ DATE, TIME, DATETIME
+ }
+
+ /**
+ * 构建值为null的DateColumn,使用Date子类型为DATETIME
+ * */
+ public DateColumn() {
+ this((Long)null);
+ }
+
+ /**
+ * 构建值为stamp(Unix时间戳)的DateColumn,使用Date子类型为DATETIME
+ * 实际存储有date改为long的ms,节省存储
+ * */
+ public DateColumn(final Long stamp) {
+ super(stamp, Column.Type.DATE, (null == stamp ? 0 : 8));
+ }
+
+ /**
+ * 构建值为date(java.util.Date)的DateColumn,使用Date子类型为DATETIME
+ * */
+ public DateColumn(final Date date) {
+ this(date == null ? null : date.getTime());
+ }
+
+ /**
+ * 构建值为date(java.sql.Date)的DateColumn,使用Date子类型为DATE,只有日期,没有时间
+ * */
+ public DateColumn(final java.sql.Date date) {
+ this(date == null ? null : date.getTime());
+ this.setSubType(DateType.DATE);
+ }
+
+ /**
+ * 构建值为time(java.sql.Time)的DateColumn,使用Date子类型为TIME,只有时间,没有日期
+ * */
+ public DateColumn(final java.sql.Time time) {
+ this(time == null ? null : time.getTime());
+ this.setSubType(DateType.TIME);
+ }
+
+ /**
+ * 构建值为ts(java.sql.Timestamp)的DateColumn,使用Date子类型为DATETIME
+ * */
+ public DateColumn(final java.sql.Timestamp ts) {
+ this(ts == null ? null : ts.getTime());
+ this.setSubType(DateType.DATETIME);
+ }
+
+ @Override
+ public Long asLong() {
+
+ return (Long)this.getRawData();
+ }
+
+ @Override
+ public String asString() {
+ try {
+ return ColumnCast.date2String(this);
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT,
+ String.format("Date[%s]类型不能转为String .", this.toString()));
+ }
+ }
+
+ @Override
+ public Date asDate() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ return new Date((Long)this.getRawData());
+ }
+
+ @Override
+ public byte[] asBytes() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Date类型不能转为Bytes .");
+ }
+
+ @Override
+ public Boolean asBoolean() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Date类型不能转为Boolean .");
+ }
+
+ @Override
+ public Double asDouble() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Date类型不能转为Double .");
+ }
+
+ @Override
+ public BigInteger asBigInteger() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Date类型不能转为BigInteger .");
+ }
+
+ @Override
+ public BigDecimal asBigDecimal() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Date类型不能转为BigDecimal .");
+ }
+
+ public DateType getSubType() {
+ return subType;
+ }
+
+ public void setSubType(DateType subType) {
+ this.subType = subType;
+ }
+}
\ No newline at end of file
diff --git a/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java b/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java
new file mode 100755
index 0000000000..17170ea6c4
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java
@@ -0,0 +1,161 @@
+package com.alibaba.datax.common.element;
+
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.Date;
+
+public class DoubleColumn extends Column {
+
+ public DoubleColumn(final String data) {
+ this(data, null == data ? 0 : data.length());
+ this.validate(data);
+ }
+
+ public DoubleColumn(Long data) {
+ this(data == null ? (String) null : String.valueOf(data));
+ }
+
+ public DoubleColumn(Integer data) {
+ this(data == null ? (String) null : String.valueOf(data));
+ }
+
+ /**
+ * Double无法表示准确的小数数据,我们不推荐使用该方法保存Double数据,建议使用String作为构造入参
+ *
+ * */
+ public DoubleColumn(final Double data) {
+ this(data == null ? (String) null
+ : new BigDecimal(String.valueOf(data)).toPlainString());
+ }
+
+ /**
+ * Float无法表示准确的小数数据,我们不推荐使用该方法保存Float数据,建议使用String作为构造入参
+ *
+ * */
+ public DoubleColumn(final Float data) {
+ this(data == null ? (String) null
+ : new BigDecimal(String.valueOf(data)).toPlainString());
+ }
+
+ public DoubleColumn(final BigDecimal data) {
+ this(null == data ? (String) null : data.toPlainString());
+ }
+
+ public DoubleColumn(final BigInteger data) {
+ this(null == data ? (String) null : data.toString());
+ }
+
+ public DoubleColumn() {
+ this((String) null);
+ }
+
+ private DoubleColumn(final String data, int byteSize) {
+ super(data, Column.Type.DOUBLE, byteSize);
+ }
+
+ @Override
+ public BigDecimal asBigDecimal() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ try {
+ return new BigDecimal((String) this.getRawData());
+ } catch (NumberFormatException e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT,
+ String.format("String[%s] 无法转换为Double类型 .",
+ (String) this.getRawData()));
+ }
+ }
+
+ @Override
+ public Double asDouble() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ String string = (String) this.getRawData();
+
+ boolean isDoubleSpecific = string.equals("NaN")
+ || string.equals("-Infinity") || string.equals("+Infinity");
+ if (isDoubleSpecific) {
+ return Double.valueOf(string);
+ }
+
+ BigDecimal result = this.asBigDecimal();
+ OverFlowUtil.validateDoubleNotOverFlow(result);
+
+ return result.doubleValue();
+ }
+
+ @Override
+ public Long asLong() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ BigDecimal result = this.asBigDecimal();
+ OverFlowUtil.validateLongNotOverFlow(result.toBigInteger());
+
+ return result.longValue();
+ }
+
+ @Override
+ public BigInteger asBigInteger() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ return this.asBigDecimal().toBigInteger();
+ }
+
+ @Override
+ public String asString() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+ return (String) this.getRawData();
+ }
+
+ @Override
+ public Boolean asBoolean() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Bool .");
+ }
+
+ @Override
+ public Date asDate() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Date类型 .");
+ }
+
+ @Override
+ public byte[] asBytes() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Bytes类型 .");
+ }
+
+ private void validate(final String data) {
+ if (null == data) {
+ return;
+ }
+
+ if (data.equalsIgnoreCase("NaN") || data.equalsIgnoreCase("-Infinity")
+ || data.equalsIgnoreCase("Infinity")) {
+ return;
+ }
+
+ try {
+ new BigDecimal(data);
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT,
+ String.format("String[%s]无法转为Double类型 .", data));
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java b/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java
new file mode 100755
index 0000000000..d8113f7c05
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java
@@ -0,0 +1,135 @@
+package com.alibaba.datax.common.element;
+
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+import org.apache.commons.lang3.math.NumberUtils;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.Date;
+
+public class LongColumn extends Column {
+
+ /**
+ * 从整形字符串表示转为LongColumn,支持Java科学计数法
+ *
+ * NOTE:
+ * 如果data为浮点类型的字符串表示,数据将会失真,请使用DoubleColumn对接浮点字符串
+ *
+ * */
+ public LongColumn(final String data) {
+ super(null, Column.Type.LONG, 0);
+ if (null == data) {
+ return;
+ }
+
+ try {
+ BigInteger rawData = NumberUtils.createBigDecimal(data)
+ .toBigInteger();
+ super.setRawData(rawData);
+
+ // 当 rawData 为[0-127]时,rawData.bitLength() < 8,导致其 byteSize = 0,简单起见,直接认为其长度为 data.length()
+ // super.setByteSize(rawData.bitLength() / 8);
+ super.setByteSize(data.length());
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT,
+ String.format("String[%s]不能转为Long .", data));
+ }
+ }
+
+ public LongColumn(Long data) {
+ this(null == data ? (BigInteger) null : BigInteger.valueOf(data));
+ }
+
+ public LongColumn(Integer data) {
+ this(null == data ? (BigInteger) null : BigInteger.valueOf(data));
+ }
+
+ public LongColumn(BigInteger data) {
+ this(data, null == data ? 0 : 8);
+ }
+
+ private LongColumn(BigInteger data, int byteSize) {
+ super(data, Column.Type.LONG, byteSize);
+ }
+
+ public LongColumn() {
+ this((BigInteger) null);
+ }
+
+ @Override
+ public BigInteger asBigInteger() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ return (BigInteger) this.getRawData();
+ }
+
+ @Override
+ public Long asLong() {
+ BigInteger rawData = (BigInteger) this.getRawData();
+ if (null == rawData) {
+ return null;
+ }
+
+ OverFlowUtil.validateLongNotOverFlow(rawData);
+
+ return rawData.longValue();
+ }
+
+ @Override
+ public Double asDouble() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ BigDecimal decimal = this.asBigDecimal();
+ OverFlowUtil.validateDoubleNotOverFlow(decimal);
+
+ return decimal.doubleValue();
+ }
+
+ @Override
+ public Boolean asBoolean() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ return this.asBigInteger().compareTo(BigInteger.ZERO) != 0 ? true
+ : false;
+ }
+
+ @Override
+ public BigDecimal asBigDecimal() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ return new BigDecimal(this.asBigInteger());
+ }
+
+ @Override
+ public String asString() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+ return ((BigInteger) this.getRawData()).toString();
+ }
+
+ @Override
+ public Date asDate() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+ return new Date(this.asLong());
+ }
+
+ @Override
+ public byte[] asBytes() {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, "Long类型不能转为Bytes .");
+ }
+
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/element/OverFlowUtil.java b/common/src/main/java/com/alibaba/datax/common/element/OverFlowUtil.java
new file mode 100755
index 0000000000..39460c7ebc
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/element/OverFlowUtil.java
@@ -0,0 +1,62 @@
+package com.alibaba.datax.common.element;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+
+public final class OverFlowUtil {
+ public static final BigInteger MAX_LONG = BigInteger
+ .valueOf(Long.MAX_VALUE);
+
+ public static final BigInteger MIN_LONG = BigInteger
+ .valueOf(Long.MIN_VALUE);
+
+ public static final BigDecimal MIN_DOUBLE_POSITIVE = new BigDecimal(
+ String.valueOf(Double.MIN_VALUE));
+
+ public static final BigDecimal MAX_DOUBLE_POSITIVE = new BigDecimal(
+ String.valueOf(Double.MAX_VALUE));
+
+ public static boolean isLongOverflow(final BigInteger integer) {
+ return (integer.compareTo(OverFlowUtil.MAX_LONG) > 0 || integer
+ .compareTo(OverFlowUtil.MIN_LONG) < 0);
+
+ }
+
+ public static void validateLongNotOverFlow(final BigInteger integer) {
+ boolean isOverFlow = OverFlowUtil.isLongOverflow(integer);
+
+ if (isOverFlow) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_OVER_FLOW,
+ String.format("[%s] 转为Long类型出现溢出 .", integer.toString()));
+ }
+ }
+
+ public static boolean isDoubleOverFlow(final BigDecimal decimal) {
+ if (decimal.signum() == 0) {
+ return false;
+ }
+
+ BigDecimal newDecimal = decimal;
+ boolean isPositive = decimal.signum() == 1;
+ if (!isPositive) {
+ newDecimal = decimal.negate();
+ }
+
+ return (newDecimal.compareTo(MIN_DOUBLE_POSITIVE) < 0 || newDecimal
+ .compareTo(MAX_DOUBLE_POSITIVE) > 0);
+ }
+
+ public static void validateDoubleNotOverFlow(final BigDecimal decimal) {
+ boolean isOverFlow = OverFlowUtil.isDoubleOverFlow(decimal);
+ if (isOverFlow) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_OVER_FLOW,
+ String.format("[%s]转为Double类型出现溢出 .",
+ decimal.toPlainString()));
+ }
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/element/Record.java b/common/src/main/java/com/alibaba/datax/common/element/Record.java
new file mode 100755
index 0000000000..d06d80aafb
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/element/Record.java
@@ -0,0 +1,23 @@
+package com.alibaba.datax.common.element;
+
+/**
+ * Created by jingxing on 14-8-24.
+ */
+
+public interface Record {
+
+ public void addColumn(Column column);
+
+ public void setColumn(int i, final Column column);
+
+ public Column getColumn(int i);
+
+ public String toString();
+
+ public int getColumnNumber();
+
+ public int getByteSize();
+
+ public int getMemorySize();
+
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java b/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java
new file mode 100755
index 0000000000..11209f4688
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java
@@ -0,0 +1,163 @@
+package com.alibaba.datax.common.element;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.Date;
+
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+
+/**
+ * Created by jingxing on 14-8-24.
+ */
+
+public class StringColumn extends Column {
+
+ public StringColumn() {
+ this((String) null);
+ }
+
+ public StringColumn(final String rawData) {
+ super(rawData, Column.Type.STRING, (null == rawData ? 0 : rawData
+ .length()));
+ }
+
+ @Override
+ public String asString() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ return (String) this.getRawData();
+ }
+
+ private void validateDoubleSpecific(final String data) {
+ if ("NaN".equals(data) || "Infinity".equals(data)
+ || "-Infinity".equals(data)) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT,
+ String.format("String[\"%s\"]属于Double特殊类型,不能转为其他类型 .", data));
+ }
+
+ return;
+ }
+
+ @Override
+ public BigInteger asBigInteger() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ this.validateDoubleSpecific((String) this.getRawData());
+
+ try {
+ return this.asBigDecimal().toBigInteger();
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, String.format(
+ "String[\"%s\"]不能转为BigInteger .", this.asString()));
+ }
+ }
+
+ @Override
+ public Long asLong() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ this.validateDoubleSpecific((String) this.getRawData());
+
+ try {
+ BigInteger integer = this.asBigInteger();
+ OverFlowUtil.validateLongNotOverFlow(integer);
+ return integer.longValue();
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT,
+ String.format("String[\"%s\"]不能转为Long .", this.asString()));
+ }
+ }
+
+ @Override
+ public BigDecimal asBigDecimal() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ this.validateDoubleSpecific((String) this.getRawData());
+
+ try {
+ return new BigDecimal(this.asString());
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT, String.format(
+ "String [\"%s\"] 不能转为BigDecimal .", this.asString()));
+ }
+ }
+
+ @Override
+ public Double asDouble() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ String data = (String) this.getRawData();
+ if ("NaN".equals(data)) {
+ return Double.NaN;
+ }
+
+ if ("Infinity".equals(data)) {
+ return Double.POSITIVE_INFINITY;
+ }
+
+ if ("-Infinity".equals(data)) {
+ return Double.NEGATIVE_INFINITY;
+ }
+
+ BigDecimal decimal = this.asBigDecimal();
+ OverFlowUtil.validateDoubleNotOverFlow(decimal);
+
+ return decimal.doubleValue();
+ }
+
+ @Override
+ public Boolean asBoolean() {
+ if (null == this.getRawData()) {
+ return null;
+ }
+
+ if ("true".equalsIgnoreCase(this.asString())) {
+ return true;
+ }
+
+ if ("false".equalsIgnoreCase(this.asString())) {
+ return false;
+ }
+
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT,
+ String.format("String[\"%s\"]不能转为Bool .", this.asString()));
+ }
+
+ @Override
+ public Date asDate() {
+ try {
+ return ColumnCast.string2Date(this);
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT,
+ String.format("String[\"%s\"]不能转为Date .", this.asString()));
+ }
+ }
+
+ @Override
+ public byte[] asBytes() {
+ try {
+ return ColumnCast.string2Bytes(this);
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONVERT_NOT_SUPPORT,
+ String.format("String[\"%s\"]不能转为Bytes .", this.asString()));
+ }
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/exception/CommonErrorCode.java b/common/src/main/java/com/alibaba/datax/common/exception/CommonErrorCode.java
new file mode 100755
index 0000000000..8679ffb475
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/exception/CommonErrorCode.java
@@ -0,0 +1,45 @@
+package com.alibaba.datax.common.exception;
+
+import com.alibaba.datax.common.spi.ErrorCode;
+
+/**
+ *
+ */
+public enum CommonErrorCode implements ErrorCode {
+
+ CONFIG_ERROR("Common-00", "您提供的配置文件存在错误信息,请检查您的作业配置 ."),
+ CONVERT_NOT_SUPPORT("Common-01", "同步数据出现业务脏数据情况,数据类型转换错误 ."),
+ CONVERT_OVER_FLOW("Common-02", "同步数据出现业务脏数据情况,数据类型转换溢出 ."),
+ RETRY_FAIL("Common-10", "方法调用多次仍旧失败 ."),
+ RUNTIME_ERROR("Common-11", "运行时内部调用错误 ."),
+ HOOK_INTERNAL_ERROR("Common-12", "Hook运行错误 ."),
+ SHUT_DOWN_TASK("Common-20", "Task收到了shutdown指令,为failover做准备"),
+ WAIT_TIME_EXCEED("Common-21", "等待时间超出范围"),
+ TASK_HUNG_EXPIRED("Common-22", "任务hung住,Expired");
+
+ private final String code;
+
+ private final String describe;
+
+ private CommonErrorCode(String code, String describe) {
+ this.code = code;
+ this.describe = describe;
+ }
+
+ @Override
+ public String getCode() {
+ return this.code;
+ }
+
+ @Override
+ public String getDescription() {
+ return this.describe;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("Code:[%s], Describe:[%s]", this.code,
+ this.describe);
+ }
+
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java b/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java
new file mode 100755
index 0000000000..f360e69900
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java
@@ -0,0 +1,62 @@
+package com.alibaba.datax.common.exception;
+
+import com.alibaba.datax.common.spi.ErrorCode;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+public class DataXException extends RuntimeException {
+
+ private static final long serialVersionUID = 1L;
+
+ private ErrorCode errorCode;
+
+ public DataXException(ErrorCode errorCode, String errorMessage) {
+ super(errorCode.toString() + " - " + errorMessage);
+ this.errorCode = errorCode;
+ }
+
+ private DataXException(ErrorCode errorCode, String errorMessage, Throwable cause) {
+ super(errorCode.toString() + " - " + getMessage(errorMessage) + " - " + getMessage(cause), cause);
+
+ this.errorCode = errorCode;
+ }
+
+ public static DataXException asDataXException(ErrorCode errorCode, String message) {
+ return new DataXException(errorCode, message);
+ }
+
+ public static DataXException asDataXException(ErrorCode errorCode, String message, Throwable cause) {
+ if (cause instanceof DataXException) {
+ return (DataXException) cause;
+ }
+ return new DataXException(errorCode, message, cause);
+ }
+
+ public static DataXException asDataXException(ErrorCode errorCode, Throwable cause) {
+ if (cause instanceof DataXException) {
+ return (DataXException) cause;
+ }
+ return new DataXException(errorCode, getMessage(cause), cause);
+ }
+
+ public ErrorCode getErrorCode() {
+ return this.errorCode;
+ }
+
+ private static String getMessage(Object obj) {
+ if (obj == null) {
+ return "";
+ }
+
+ if (obj instanceof Throwable) {
+ StringWriter str = new StringWriter();
+ PrintWriter pw = new PrintWriter(str);
+ ((Throwable) obj).printStackTrace(pw);
+ return str.toString();
+ // return ((Throwable) obj).getMessage();
+ } else {
+ return obj.toString();
+ }
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/exception/ExceptionTracker.java b/common/src/main/java/com/alibaba/datax/common/exception/ExceptionTracker.java
new file mode 100644
index 0000000000..f6d3732e2a
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/exception/ExceptionTracker.java
@@ -0,0 +1,15 @@
+package com.alibaba.datax.common.exception;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+public final class ExceptionTracker {
+ public static final int STRING_BUFFER = 1024;
+
+ public static String trace(Throwable ex) {
+ StringWriter sw = new StringWriter(STRING_BUFFER);
+ PrintWriter pw = new PrintWriter(sw);
+ ex.printStackTrace(pw);
+ return sw.toString();
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractJobPlugin.java b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractJobPlugin.java
new file mode 100755
index 0000000000..946adfd0e4
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractJobPlugin.java
@@ -0,0 +1,25 @@
+package com.alibaba.datax.common.plugin;
+
+/**
+ * Created by jingxing on 14-8-24.
+ */
+public abstract class AbstractJobPlugin extends AbstractPlugin {
+ /**
+ * @return the jobPluginCollector
+ */
+ public JobPluginCollector getJobPluginCollector() {
+ return jobPluginCollector;
+ }
+
+ /**
+ * @param jobPluginCollector
+ * the jobPluginCollector to set
+ */
+ public void setJobPluginCollector(
+ JobPluginCollector jobPluginCollector) {
+ this.jobPluginCollector = jobPluginCollector;
+ }
+
+ private JobPluginCollector jobPluginCollector;
+
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java
new file mode 100755
index 0000000000..184ee89ece
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java
@@ -0,0 +1,87 @@
+package com.alibaba.datax.common.plugin;
+
+import com.alibaba.datax.common.base.BaseObject;
+import com.alibaba.datax.common.util.Configuration;
+
+public abstract class AbstractPlugin extends BaseObject implements Pluginable {
+ //作业的config
+ private Configuration pluginJobConf;
+
+ //插件本身的plugin
+ private Configuration pluginConf;
+
+ // by qiangsi.lq。 修改为对端的作业configuration
+ private Configuration peerPluginJobConf;
+
+ private String peerPluginName;
+
+ @Override
+ public String getPluginName() {
+ assert null != this.pluginConf;
+ return this.pluginConf.getString("name");
+ }
+
+ @Override
+ public String getDeveloper() {
+ assert null != this.pluginConf;
+ return this.pluginConf.getString("developer");
+ }
+
+ @Override
+ public String getDescription() {
+ assert null != this.pluginConf;
+ return this.pluginConf.getString("description");
+ }
+
+ @Override
+ public Configuration getPluginJobConf() {
+ return pluginJobConf;
+ }
+
+ @Override
+ public void setPluginJobConf(Configuration pluginJobConf) {
+ this.pluginJobConf = pluginJobConf;
+ }
+
+ @Override
+ public void setPluginConf(Configuration pluginConf) {
+ this.pluginConf = pluginConf;
+ }
+
+ @Override
+ public Configuration getPeerPluginJobConf() {
+ return peerPluginJobConf;
+ }
+
+ @Override
+ public void setPeerPluginJobConf(Configuration peerPluginJobConf) {
+ this.peerPluginJobConf = peerPluginJobConf;
+ }
+
+ @Override
+ public String getPeerPluginName() {
+ return peerPluginName;
+ }
+
+ @Override
+ public void setPeerPluginName(String peerPluginName) {
+ this.peerPluginName = peerPluginName;
+ }
+
+ public void preCheck() {
+ }
+
+ public void prepare() {
+ }
+
+ public void post() {
+ }
+
+ public void preHandler(Configuration jobConfiguration){
+
+ }
+
+ public void postHandler(Configuration jobConfiguration){
+
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractTaskPlugin.java b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractTaskPlugin.java
new file mode 100755
index 0000000000..39fbbe9b52
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractTaskPlugin.java
@@ -0,0 +1,37 @@
+package com.alibaba.datax.common.plugin;
+
+/**
+ * Created by jingxing on 14-8-24.
+ */
+public abstract class AbstractTaskPlugin extends AbstractPlugin {
+
+ //TaskPlugin 应该具备taskId
+ private int taskGroupId;
+ private int taskId;
+ private TaskPluginCollector taskPluginCollector;
+
+ public TaskPluginCollector getTaskPluginCollector() {
+ return taskPluginCollector;
+ }
+
+ public void setTaskPluginCollector(
+ TaskPluginCollector taskPluginCollector) {
+ this.taskPluginCollector = taskPluginCollector;
+ }
+
+ public int getTaskId() {
+ return taskId;
+ }
+
+ public void setTaskId(int taskId) {
+ this.taskId = taskId;
+ }
+
+ public int getTaskGroupId() {
+ return taskGroupId;
+ }
+
+ public void setTaskGroupId(int taskGroupId) {
+ this.taskGroupId = taskGroupId;
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/JobPluginCollector.java b/common/src/main/java/com/alibaba/datax/common/plugin/JobPluginCollector.java
new file mode 100755
index 0000000000..6eb02ab4e7
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/plugin/JobPluginCollector.java
@@ -0,0 +1,22 @@
+package com.alibaba.datax.common.plugin;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Created by jingxing on 14-9-9.
+ */
+public interface JobPluginCollector extends PluginCollector {
+
+ /**
+ * 从Task获取自定义收集信息
+ *
+ * */
+ Map> getMessage();
+
+ /**
+ * 从Task获取自定义收集信息
+ *
+ * */
+ List getMessage(String key);
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/PluginCollector.java b/common/src/main/java/com/alibaba/datax/common/plugin/PluginCollector.java
new file mode 100755
index 0000000000..f2af398dd3
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/plugin/PluginCollector.java
@@ -0,0 +1,9 @@
+package com.alibaba.datax.common.plugin;
+
+
+/**
+ * 这里只是一个标示类
+ * */
+public interface PluginCollector {
+
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/Pluginable.java b/common/src/main/java/com/alibaba/datax/common/plugin/Pluginable.java
new file mode 100755
index 0000000000..ac28f6a294
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/plugin/Pluginable.java
@@ -0,0 +1,30 @@
+package com.alibaba.datax.common.plugin;
+
+import com.alibaba.datax.common.util.Configuration;
+
+public interface Pluginable {
+ String getDeveloper();
+
+ String getDescription();
+
+ void setPluginConf(Configuration pluginConf);
+
+ void init();
+
+ void destroy();
+
+ String getPluginName();
+
+ Configuration getPluginJobConf();
+
+ Configuration getPeerPluginJobConf();
+
+ public String getPeerPluginName();
+
+ void setPluginJobConf(Configuration jobConf);
+
+ void setPeerPluginJobConf(Configuration peerPluginJobConf);
+
+ public void setPeerPluginName(String peerPluginName);
+
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/RecordReceiver.java b/common/src/main/java/com/alibaba/datax/common/plugin/RecordReceiver.java
new file mode 100755
index 0000000000..74f236f371
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/plugin/RecordReceiver.java
@@ -0,0 +1,26 @@
+/**
+ * (C) 2010-2013 Alibaba Group Holding Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.alibaba.datax.common.plugin;
+
+import com.alibaba.datax.common.element.Record;
+
+public interface RecordReceiver {
+
+ public Record getFromReader();
+
+ public void shutdown();
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/RecordSender.java b/common/src/main/java/com/alibaba/datax/common/plugin/RecordSender.java
new file mode 100755
index 0000000000..0d6926098f
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/plugin/RecordSender.java
@@ -0,0 +1,32 @@
+/**
+ * (C) 2010-2013 Alibaba Group Holding Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.alibaba.datax.common.plugin;
+
+import com.alibaba.datax.common.element.Record;
+
+public interface RecordSender {
+
+ public Record createRecord();
+
+ public void sendToWriter(Record record);
+
+ public void flush();
+
+ public void terminate();
+
+ public void shutdown();
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/TaskPluginCollector.java b/common/src/main/java/com/alibaba/datax/common/plugin/TaskPluginCollector.java
new file mode 100755
index 0000000000..f0c85fe6ce
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/plugin/TaskPluginCollector.java
@@ -0,0 +1,57 @@
+package com.alibaba.datax.common.plugin;
+
+import com.alibaba.datax.common.element.Record;
+
+/**
+ *
+ * 该接口提供给Task Plugin用来记录脏数据和自定义信息。
+ *
+ * 1. 脏数据记录,TaskPluginCollector提供多种脏数据记录的适配,包括本地输出、集中式汇报等等
+ * 2. 自定义信息,所有的task插件运行过程中可以通过TaskPluginCollector收集信息,
+ * Job的插件在POST过程中通过getMessage()接口获取信息
+ */
+public abstract class TaskPluginCollector implements PluginCollector {
+ /**
+ * 收集脏数据
+ *
+ * @param dirtyRecord
+ * 脏数据信息
+ * @param t
+ * 异常信息
+ * @param errorMessage
+ * 错误的提示信息
+ */
+ public abstract void collectDirtyRecord(final Record dirtyRecord,
+ final Throwable t, final String errorMessage);
+
+ /**
+ * 收集脏数据
+ *
+ * @param dirtyRecord
+ * 脏数据信息
+ * @param errorMessage
+ * 错误的提示信息
+ */
+ public void collectDirtyRecord(final Record dirtyRecord,
+ final String errorMessage) {
+ this.collectDirtyRecord(dirtyRecord, null, errorMessage);
+ }
+
+ /**
+ * 收集脏数据
+ *
+ * @param dirtyRecord
+ * 脏数据信息
+ * @param t
+ * 异常信息
+ */
+ public void collectDirtyRecord(final Record dirtyRecord, final Throwable t) {
+ this.collectDirtyRecord(dirtyRecord, t, "");
+ }
+
+ /**
+ * 收集自定义信息,Job插件可以通过getMessage获取该信息
+ * 如果多个key冲突,内部使用List记录同一个key,多个value情况。
+ * */
+ public abstract void collectMessage(final String key, final String value);
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/spi/ErrorCode.java b/common/src/main/java/com/alibaba/datax/common/spi/ErrorCode.java
new file mode 100755
index 0000000000..053f99a479
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/spi/ErrorCode.java
@@ -0,0 +1,33 @@
+package com.alibaba.datax.common.spi;
+
+/**
+ * 尤其注意:最好提供toString()实现。例如:
+ *
+ *
+ *
+ * @Override
+ * public String toString() {
+ * return String.format("Code:[%s], Description:[%s]. ", this.code, this.describe);
+ * }
+ *
+ *
+ */
+public interface ErrorCode {
+ // 错误码编号
+ String getCode();
+
+ // 错误码描述
+ String getDescription();
+
+ /** 必须提供toString的实现
+ *
+ *
+ * @Override
+ * public String toString() {
+ * return String.format("Code:[%s], Description:[%s]. ", this.code, this.describe);
+ * }
+ *
+ *
+ */
+ String toString();
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/spi/Hook.java b/common/src/main/java/com/alibaba/datax/common/spi/Hook.java
new file mode 100755
index 0000000000..d510f57c18
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/spi/Hook.java
@@ -0,0 +1,27 @@
+package com.alibaba.datax.common.spi;
+
+import com.alibaba.datax.common.util.Configuration;
+
+import java.util.Map;
+
+/**
+ * Created by xiafei.qiuxf on 14/12/17.
+ */
+public interface Hook {
+
+ /**
+ * 返回名字
+ *
+ * @return
+ */
+ public String getName();
+
+ /**
+ * TODO 文档
+ *
+ * @param jobConf
+ * @param msg
+ */
+ public void invoke(Configuration jobConf, Map msg);
+
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/spi/Reader.java b/common/src/main/java/com/alibaba/datax/common/spi/Reader.java
new file mode 100755
index 0000000000..fec41a9f03
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/spi/Reader.java
@@ -0,0 +1,52 @@
+package com.alibaba.datax.common.spi;
+
+import java.util.List;
+
+import com.alibaba.datax.common.base.BaseObject;
+import com.alibaba.datax.common.plugin.AbstractJobPlugin;
+import com.alibaba.datax.common.plugin.AbstractTaskPlugin;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.common.plugin.RecordSender;
+
+/**
+ * 每个Reader插件在其内部内部实现Job、Task两个内部类。
+ *
+ *
+ * */
+public abstract class Reader extends BaseObject {
+
+ /**
+ * 每个Reader插件必须实现Job内部类。
+ *
+ * */
+ public static abstract class Job extends AbstractJobPlugin {
+
+ /**
+ * 切分任务
+ *
+ * @param adviceNumber
+ *
+ * 着重说明下,adviceNumber是框架建议插件切分的任务数,插件开发人员最好切分出来的任务数>=
+ * adviceNumber。
+ *
+ * 之所以采取这个建议是为了给用户最好的实现,例如框架根据计算认为用户数据存储可以支持100个并发连接,
+ * 并且用户认为需要100个并发。 此时,插件开发人员如果能够根据上述切分规则进行切分并做到>=100连接信息,
+ * DataX就可以同时启动100个Channel,这样给用户最好的吞吐量
+ * 例如用户同步一张Mysql单表,但是认为可以到10并发吞吐量,插件开发人员最好对该表进行切分,比如使用主键范围切分,
+ * 并且如果最终切分任务数到>=10,我们就可以提供给用户最大的吞吐量。
+ *
+ * 当然,我们这里只是提供一个建议值,Reader插件可以按照自己规则切分。但是我们更建议按照框架提供的建议值来切分。
+ *
+ * 对于ODPS写入OTS而言,如果存在预排序预切分问题,这样就可能只能按照分区信息切分,无法更细粒度切分,
+ * 这类情况只能按照源头物理信息切分规则切分。
+ *
+ *
+ *
+ * */
+ public abstract List split(int adviceNumber);
+ }
+
+ public static abstract class Task extends AbstractTaskPlugin {
+ public abstract void startRead(RecordSender recordSender);
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/spi/Writer.java b/common/src/main/java/com/alibaba/datax/common/spi/Writer.java
new file mode 100755
index 0000000000..457eb6860c
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/spi/Writer.java
@@ -0,0 +1,40 @@
+package com.alibaba.datax.common.spi;
+
+import com.alibaba.datax.common.base.BaseObject;
+import com.alibaba.datax.common.plugin.AbstractJobPlugin;
+import com.alibaba.datax.common.plugin.AbstractTaskPlugin;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.common.plugin.RecordReceiver;
+
+import java.util.List;
+
+/**
+ * 每个Writer插件需要实现Writer类,并在其内部实现Job、Task两个内部类。
+ *
+ *
+ * */
+public abstract class Writer extends BaseObject {
+ /**
+ * 每个Writer插件必须实现Job内部类
+ */
+ public abstract static class Job extends AbstractJobPlugin {
+ /**
+ * 切分任务。
+ *
+ * @param mandatoryNumber
+ * 为了做到Reader、Writer任务数对等,这里要求Writer插件必须按照源端的切分数进行切分。否则框架报错!
+ *
+ * */
+ public abstract List split(int mandatoryNumber);
+ }
+
+ /**
+ * 每个Writer插件必须实现Task内部类
+ */
+ public abstract static class Task extends AbstractTaskPlugin {
+
+ public abstract void startWrite(RecordReceiver lineReceiver);
+
+ public boolean supportFailOver(){return false;}
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/statistics/PerfRecord.java b/common/src/main/java/com/alibaba/datax/common/statistics/PerfRecord.java
new file mode 100644
index 0000000000..74b26eeb60
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/statistics/PerfRecord.java
@@ -0,0 +1,258 @@
+package com.alibaba.datax.common.statistics;
+
+import com.alibaba.datax.common.util.HostUtils;
+import org.apache.commons.lang3.time.DateFormatUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Date;
+
+/**
+ * Created by liqiang on 15/8/23.
+ */
+@SuppressWarnings("NullableProblems")
+public class PerfRecord implements Comparable {
+ private static Logger perf = LoggerFactory.getLogger(PerfRecord.class);
+ private static String datetimeFormat = "yyyy-MM-dd HH:mm:ss";
+
+
+ public enum PHASE {
+ /**
+ * task total运行的时间,前10为框架统计,后面为部分插件的个性统计
+ */
+ TASK_TOTAL(0),
+
+ READ_TASK_INIT(1),
+ READ_TASK_PREPARE(2),
+ READ_TASK_DATA(3),
+ READ_TASK_POST(4),
+ READ_TASK_DESTROY(5),
+
+ WRITE_TASK_INIT(6),
+ WRITE_TASK_PREPARE(7),
+ WRITE_TASK_DATA(8),
+ WRITE_TASK_POST(9),
+ WRITE_TASK_DESTROY(10),
+
+ /**
+ * SQL_QUERY: sql query阶段, 部分reader的个性统计
+ */
+ SQL_QUERY(100),
+ /**
+ * 数据从sql全部读出来
+ */
+ RESULT_NEXT_ALL(101),
+
+ /**
+ * only odps block close
+ */
+ ODPS_BLOCK_CLOSE(102),
+
+ WAIT_READ_TIME(103),
+
+ WAIT_WRITE_TIME(104),
+
+ TRANSFORMER_TIME(201);
+
+ private int val;
+
+ PHASE(int val) {
+ this.val = val;
+ }
+
+ public int toInt(){
+ return val;
+ }
+ }
+
+ public enum ACTION{
+ start,
+ end
+ }
+
+ private final int taskGroupId;
+ private final int taskId;
+ private final PHASE phase;
+ private volatile ACTION action;
+ private volatile Date startTime;
+ private volatile long elapsedTimeInNs = -1;
+ private volatile long count = 0;
+ private volatile long size = 0;
+
+ private volatile long startTimeInNs;
+ private volatile boolean isReport = false;
+
+ public PerfRecord(int taskGroupId, int taskId, PHASE phase) {
+ this.taskGroupId = taskGroupId;
+ this.taskId = taskId;
+ this.phase = phase;
+ }
+
+ public static void addPerfRecord(int taskGroupId, int taskId, PHASE phase, long startTime,long elapsedTimeInNs) {
+ if(PerfTrace.getInstance().isEnable()) {
+ PerfRecord perfRecord = new PerfRecord(taskGroupId, taskId, phase);
+ perfRecord.elapsedTimeInNs = elapsedTimeInNs;
+ perfRecord.action = ACTION.end;
+ perfRecord.startTime = new Date(startTime);
+ //在PerfTrace里注册
+ PerfTrace.getInstance().tracePerfRecord(perfRecord);
+ perf.info(perfRecord.toString());
+ }
+ }
+
+ public void start() {
+ if(PerfTrace.getInstance().isEnable()) {
+ this.startTime = new Date();
+ this.startTimeInNs = System.nanoTime();
+ this.action = ACTION.start;
+ //在PerfTrace里注册
+ PerfTrace.getInstance().tracePerfRecord(this);
+ perf.info(toString());
+ }
+ }
+
+ public void addCount(long count) {
+ this.count += count;
+ }
+
+ public void addSize(long size) {
+ this.size += size;
+ }
+
+ public void end() {
+ if(PerfTrace.getInstance().isEnable()) {
+ this.elapsedTimeInNs = System.nanoTime() - startTimeInNs;
+ this.action = ACTION.end;
+ PerfTrace.getInstance().tracePerfRecord(this);
+ perf.info(toString());
+ }
+ }
+
+ public void end(long elapsedTimeInNs) {
+ if(PerfTrace.getInstance().isEnable()) {
+ this.elapsedTimeInNs = elapsedTimeInNs;
+ this.action = ACTION.end;
+ PerfTrace.getInstance().tracePerfRecord(this);
+ perf.info(toString());
+ }
+ }
+
+ public String toString() {
+ return String.format("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
+ , getInstId(), taskGroupId, taskId, phase, action,
+ DateFormatUtils.format(startTime, datetimeFormat), elapsedTimeInNs, count, size,getHostIP());
+ }
+
+
+ @Override
+ public int compareTo(PerfRecord o) {
+ if (o == null) {
+ return 1;
+ }
+ return this.elapsedTimeInNs > o.elapsedTimeInNs ? 1 : this.elapsedTimeInNs == o.elapsedTimeInNs ? 0 : -1;
+ }
+
+ @Override
+ public int hashCode() {
+ long jobId = getInstId();
+ int result = (int) (jobId ^ (jobId >>> 32));
+ result = 31 * result + taskGroupId;
+ result = 31 * result + taskId;
+ result = 31 * result + phase.toInt();
+ result = 31 * result + (startTime != null ? startTime.hashCode() : 0);
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if(!(o instanceof PerfRecord)){
+ return false;
+ }
+
+ PerfRecord dst = (PerfRecord)o;
+
+ if (this.getInstId() != dst.getInstId()) return false;
+ if (this.taskGroupId != dst.taskGroupId) return false;
+ if (this.taskId != dst.taskId) return false;
+ if (phase != null ? !phase.equals(dst.phase) : dst.phase != null) return false;
+ if (startTime != null ? !startTime.equals(dst.startTime) : dst.startTime != null) return false;
+ return true;
+ }
+
+ public PerfRecord copy() {
+ PerfRecord copy = new PerfRecord(this.taskGroupId, this.getTaskId(), this.phase);
+ copy.action = this.action;
+ copy.startTime = this.startTime;
+ copy.elapsedTimeInNs = this.elapsedTimeInNs;
+ copy.count = this.count;
+ copy.size = this.size;
+ return copy;
+ }
+ public int getTaskGroupId() {
+ return taskGroupId;
+ }
+
+ public int getTaskId() {
+ return taskId;
+ }
+
+ public PHASE getPhase() {
+ return phase;
+ }
+
+ public ACTION getAction() {
+ return action;
+ }
+
+ public long getElapsedTimeInNs() {
+ return elapsedTimeInNs;
+ }
+
+ public long getCount() {
+ return count;
+ }
+
+ public long getSize() {
+ return size;
+ }
+
+ public long getInstId(){
+ return PerfTrace.getInstance().getInstId();
+ }
+
+ public String getHostIP(){
+ return HostUtils.IP;
+ }
+
+ public String getHostName(){
+ return HostUtils.HOSTNAME;
+ }
+
+ public Date getStartTime() {
+ return startTime;
+ }
+
+ public long getStartTimeInMs() {
+ return startTime.getTime();
+ }
+
+ public long getStartTimeInNs() {
+ return startTimeInNs;
+ }
+
+ public String getDatetime(){
+ if(startTime == null){
+ return "null time";
+ }
+ return DateFormatUtils.format(startTime, datetimeFormat);
+ }
+
+ public boolean isReport() {
+ return isReport;
+ }
+
+ public void setIsReport(boolean isReport) {
+ this.isReport = isReport;
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/statistics/PerfTrace.java b/common/src/main/java/com/alibaba/datax/common/statistics/PerfTrace.java
new file mode 100644
index 0000000000..ea9aa42110
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/statistics/PerfTrace.java
@@ -0,0 +1,907 @@
+package com.alibaba.datax.common.statistics;
+
+import com.alibaba.datax.common.statistics.PerfRecord.PHASE;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.common.util.HostUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * PerfTrace 记录 job(local模式),taskGroup(distribute模式),因为这2种都是jvm,即一个jvm里只需要有1个PerfTrace。
+ */
+
+public class PerfTrace {
+
+ private static Logger LOG = LoggerFactory.getLogger(PerfTrace.class);
+ private static PerfTrace instance;
+ private static final Object lock = new Object();
+ private String perfTraceId;
+ private volatile boolean enable;
+ private volatile boolean isJob;
+ private long instId;
+ private long jobId;
+ private long jobVersion;
+ private int taskGroupId;
+ private int channelNumber;
+
+ private int priority;
+ private int batchSize = 500;
+ private volatile boolean perfReportEnable = true;
+
+ //jobid_jobversion,instanceid,taskid, src_mark, dst_mark,
+ private Map taskDetails = new ConcurrentHashMap();
+ //PHASE => PerfRecord
+ private ConcurrentHashMap perfRecordMaps4print = new ConcurrentHashMap();
+ // job_phase => SumPerf4Report
+ private SumPerf4Report sumPerf4Report = new SumPerf4Report();
+ private SumPerf4Report sumPerf4Report4NotEnd;
+ private Configuration jobInfo;
+ private final Set needReportPool4NotEnd = new HashSet();
+ private final List totalEndReport = new ArrayList();
+
+ /**
+ * 单实例
+ *
+ * @param isJob
+ * @param jobId
+ * @param taskGroupId
+ * @return
+ */
+ public static PerfTrace getInstance(boolean isJob, long jobId, int taskGroupId, int priority, boolean enable) {
+
+ if (instance == null) {
+ synchronized (lock) {
+ if (instance == null) {
+ instance = new PerfTrace(isJob, jobId, taskGroupId, priority, enable);
+ }
+ }
+ }
+ return instance;
+ }
+
+ /**
+ * 因为一个JVM只有一个,因此在getInstance(isJob,jobId,taskGroupId)调用完成实例化后,方便后续调用,直接返回该实例
+ *
+ * @return
+ */
+ public static PerfTrace getInstance() {
+ if (instance == null) {
+ LOG.error("PerfTrace instance not be init! must have some error! ");
+ synchronized (lock) {
+ if (instance == null) {
+ instance = new PerfTrace(false, -1111, -1111, 0, false);
+ }
+ }
+ }
+ return instance;
+ }
+
+ private PerfTrace(boolean isJob, long jobId, int taskGroupId, int priority, boolean enable) {
+ try {
+ this.perfTraceId = isJob ? "job_" + jobId : String.format("taskGroup_%s_%s", jobId, taskGroupId);
+ this.enable = enable;
+ this.isJob = isJob;
+ this.taskGroupId = taskGroupId;
+ this.instId = jobId;
+ this.priority = priority;
+ LOG.info(String.format("PerfTrace traceId=%s, isEnable=%s, priority=%s", this.perfTraceId, this.enable, this.priority));
+
+ } catch (Exception e) {
+ // do nothing
+ this.enable = false;
+ }
+ }
+
+ public void addTaskDetails(int taskId, String detail) {
+ if (enable) {
+ String before = "";
+ int index = detail.indexOf("?");
+ String current = detail.substring(0, index == -1 ? detail.length() : index);
+ if (current.indexOf("[") >= 0) {
+ current += "]";
+ }
+ if (taskDetails.containsKey(taskId)) {
+ before = taskDetails.get(taskId).trim();
+ }
+ if (StringUtils.isEmpty(before)) {
+ before = "";
+ } else {
+ before += ",";
+ }
+ this.taskDetails.put(taskId, before + current);
+ }
+ }
+
+ public void tracePerfRecord(PerfRecord perfRecord) {
+ try {
+ if (enable) {
+ long curNanoTime = System.nanoTime();
+ //ArrayList非线程安全
+ switch (perfRecord.getAction()) {
+ case end:
+ synchronized (totalEndReport) {
+ totalEndReport.add(perfRecord);
+
+ if (totalEndReport.size() > batchSize * 10) {
+ sumPerf4EndPrint(totalEndReport);
+ }
+ }
+
+ if (perfReportEnable && needReport(perfRecord)) {
+ synchronized (needReportPool4NotEnd) {
+ sumPerf4Report.add(curNanoTime,perfRecord);
+ needReportPool4NotEnd.remove(perfRecord);
+ }
+ }
+
+ break;
+ case start:
+ if (perfReportEnable && needReport(perfRecord)) {
+ synchronized (needReportPool4NotEnd) {
+ needReportPool4NotEnd.add(perfRecord);
+ }
+ }
+ break;
+ }
+ }
+ } catch (Exception e) {
+ // do nothing
+ }
+ }
+
+ private boolean needReport(PerfRecord perfRecord) {
+ switch (perfRecord.getPhase()) {
+ case TASK_TOTAL:
+ case SQL_QUERY:
+ case RESULT_NEXT_ALL:
+ case ODPS_BLOCK_CLOSE:
+ return true;
+ }
+ return false;
+ }
+
+ public String summarizeNoException() {
+ String res;
+ try {
+ res = summarize();
+ } catch (Exception e) {
+ res = "PerfTrace summarize has Exception " + e.getMessage();
+ }
+ return res;
+ }
+
+ //任务结束时,对当前的perf总汇总统计
+ private synchronized String summarize() {
+ if (!enable) {
+ return "PerfTrace not enable!";
+ }
+
+ if (totalEndReport.size() > 0) {
+ sumPerf4EndPrint(totalEndReport);
+ }
+
+ StringBuilder info = new StringBuilder();
+ info.append("\n === total summarize info === \n");
+ info.append("\n 1. all phase average time info and max time task info: \n\n");
+ info.append(String.format("%-20s | %18s | %18s | %18s | %18s | %-100s\n", "PHASE", "AVERAGE USED TIME", "ALL TASK NUM", "MAX USED TIME", "MAX TASK ID", "MAX TASK INFO"));
+
+ List keys = new ArrayList(perfRecordMaps4print.keySet());
+ Collections.sort(keys, new Comparator() {
+ @Override
+ public int compare(PHASE o1, PHASE o2) {
+ return o1.toInt() - o2.toInt();
+ }
+ });
+ for (PHASE phase : keys) {
+ SumPerfRecord4Print sumPerfRecord = perfRecordMaps4print.get(phase);
+ if (sumPerfRecord == null) {
+ continue;
+ }
+ long averageTime = sumPerfRecord.getAverageTime();
+ long maxTime = sumPerfRecord.getMaxTime();
+ int maxTaskId = sumPerfRecord.maxTaskId;
+ int maxTaskGroupId = sumPerfRecord.getMaxTaskGroupId();
+ info.append(String.format("%-20s | %18s | %18s | %18s | %18s | %-100s\n",
+ phase, unitTime(averageTime), sumPerfRecord.totalCount, unitTime(maxTime), jobId + "-" + maxTaskGroupId + "-" + maxTaskId, taskDetails.get(maxTaskId)));
+ }
+
+ //SumPerfRecord4Print countSumPerf = Optional.fromNullable(perfRecordMaps4print.get(PHASE.READ_TASK_DATA)).or(new SumPerfRecord4Print());
+
+ SumPerfRecord4Print countSumPerf = perfRecordMaps4print.get(PHASE.READ_TASK_DATA);
+ if(countSumPerf == null){
+ countSumPerf = new SumPerfRecord4Print();
+ }
+
+ long averageRecords = countSumPerf.getAverageRecords();
+ long averageBytes = countSumPerf.getAverageBytes();
+ long maxRecord = countSumPerf.getMaxRecord();
+ long maxByte = countSumPerf.getMaxByte();
+ int maxTaskId4Records = countSumPerf.getMaxTaskId4Records();
+ int maxTGID4Records = countSumPerf.getMaxTGID4Records();
+
+ info.append("\n\n 2. record average count and max count task info :\n\n");
+ info.append(String.format("%-20s | %18s | %18s | %18s | %18s | %18s | %-100s\n", "PHASE", "AVERAGE RECORDS", "AVERAGE BYTES", "MAX RECORDS", "MAX RECORD`S BYTES", "MAX TASK ID", "MAX TASK INFO"));
+ if (maxTaskId4Records > -1) {
+ info.append(String.format("%-20s | %18s | %18s | %18s | %18s | %18s | %-100s\n"
+ , PHASE.READ_TASK_DATA, averageRecords, unitSize(averageBytes), maxRecord, unitSize(maxByte), jobId + "-" + maxTGID4Records + "-" + maxTaskId4Records, taskDetails.get(maxTaskId4Records)));
+
+ }
+ return info.toString();
+ }
+
+ //缺省传入的时间是nano
+ public static String unitTime(long time) {
+ return unitTime(time, TimeUnit.NANOSECONDS);
+ }
+
+ public static String unitTime(long time, TimeUnit timeUnit) {
+ return String.format("%,.3fs", ((float) timeUnit.toNanos(time)) / 1000000000);
+ }
+
+ public static String unitSize(long size) {
+ if (size > 1000000000) {
+ return String.format("%,.2fG", (float) size / 1000000000);
+ } else if (size > 1000000) {
+ return String.format("%,.2fM", (float) size / 1000000);
+ } else if (size > 1000) {
+ return String.format("%,.2fK", (float) size / 1000);
+ } else {
+ return size + "B";
+ }
+ }
+
+
+ public synchronized ConcurrentHashMap getPerfRecordMaps4print() {
+ if (totalEndReport.size() > 0) {
+ sumPerf4EndPrint(totalEndReport);
+ }
+ return perfRecordMaps4print;
+ }
+
+ public SumPerf4Report getSumPerf4Report() {
+ return sumPerf4Report;
+ }
+
+ public Set getNeedReportPool4NotEnd() {
+ return needReportPool4NotEnd;
+ }
+
+ public List getTotalEndReport() {
+ return totalEndReport;
+ }
+
+ public Map getTaskDetails() {
+ return taskDetails;
+ }
+
+ public boolean isEnable() {
+ return enable;
+ }
+
+ public boolean isJob() {
+ return isJob;
+ }
+
+ private String cluster;
+ private String jobDomain;
+ private String srcType;
+ private String dstType;
+ private String srcGuid;
+ private String dstGuid;
+ private Date windowStart;
+ private Date windowEnd;
+ private Date jobStartTime;
+
+ public void setJobInfo(Configuration jobInfo, boolean perfReportEnable, int channelNumber) {
+ try {
+ this.jobInfo = jobInfo;
+ if (jobInfo != null && perfReportEnable) {
+
+ cluster = jobInfo.getString("cluster");
+
+ String srcDomain = jobInfo.getString("srcDomain", "null");
+ String dstDomain = jobInfo.getString("dstDomain", "null");
+ jobDomain = srcDomain + "|" + dstDomain;
+ srcType = jobInfo.getString("srcType");
+ dstType = jobInfo.getString("dstType");
+ srcGuid = jobInfo.getString("srcGuid");
+ dstGuid = jobInfo.getString("dstGuid");
+ windowStart = getWindow(jobInfo.getString("windowStart"), true);
+ windowEnd = getWindow(jobInfo.getString("windowEnd"), false);
+ String jobIdStr = jobInfo.getString("jobId");
+ jobId = StringUtils.isEmpty(jobIdStr) ? (long) -5 : Long.parseLong(jobIdStr);
+ String jobVersionStr = jobInfo.getString("jobVersion");
+ jobVersion = StringUtils.isEmpty(jobVersionStr) ? (long) -4 : Long.parseLong(jobVersionStr);
+ jobStartTime = new Date();
+ }
+ this.perfReportEnable = perfReportEnable;
+ this.channelNumber = channelNumber;
+ } catch (Exception e) {
+ this.perfReportEnable = false;
+ }
+ }
+
+ private Date getWindow(String windowStr, boolean startWindow) {
+ SimpleDateFormat sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd 00:00:00");
+ if (StringUtils.isNotEmpty(windowStr)) {
+ try {
+ return sdf1.parse(windowStr);
+ } catch (ParseException e) {
+ // do nothing
+ }
+ }
+
+ if (startWindow) {
+ try {
+ return sdf2.parse(sdf2.format(new Date()));
+ } catch (ParseException e1) {
+ //do nothing
+ }
+ }
+
+ return null;
+ }
+
+ public long getInstId() {
+ return instId;
+ }
+
+ public Configuration getJobInfo() {
+ return jobInfo;
+ }
+
+ public void setBatchSize(int batchSize) {
+ this.batchSize = batchSize;
+ }
+
+ public synchronized JobStatisticsDto2 getReports(String mode) {
+
+ try {
+ if (!enable || !perfReportEnable) {
+ return null;
+ }
+
+ if (("job".equalsIgnoreCase(mode) && !isJob) || "tg".equalsIgnoreCase(mode) && isJob) {
+ return null;
+ }
+
+ //每次将未完成的task的统计清空
+ sumPerf4Report4NotEnd = new SumPerf4Report();
+ Set needReportPool4NotEndTmp = null;
+ synchronized (needReportPool4NotEnd) {
+ needReportPool4NotEndTmp = new HashSet(needReportPool4NotEnd);
+ }
+
+ long curNanoTime = System.nanoTime();
+ for (PerfRecord perfRecord : needReportPool4NotEndTmp) {
+ sumPerf4Report4NotEnd.add(curNanoTime, perfRecord);
+ }
+
+ JobStatisticsDto2 jdo = new JobStatisticsDto2();
+ jdo.setInstId(this.instId);
+ if (isJob) {
+ jdo.setTaskGroupId(-6);
+ } else {
+ jdo.setTaskGroupId(this.taskGroupId);
+ }
+ jdo.setJobId(this.jobId);
+ jdo.setJobVersion(this.jobVersion);
+ jdo.setWindowStart(this.windowStart);
+ jdo.setWindowEnd(this.windowEnd);
+ jdo.setJobStartTime(jobStartTime);
+ jdo.setJobRunTimeMs(System.currentTimeMillis() - jobStartTime.getTime());
+ jdo.setJobPriority(this.priority);
+ jdo.setChannelNum(this.channelNumber);
+ jdo.setCluster(this.cluster);
+ jdo.setJobDomain(this.jobDomain);
+ jdo.setSrcType(this.srcType);
+ jdo.setDstType(this.dstType);
+ jdo.setSrcGuid(this.srcGuid);
+ jdo.setDstGuid(this.dstGuid);
+ jdo.setHostAddress(HostUtils.IP);
+
+ //sum
+ jdo.setTaskTotalTimeMs(sumPerf4Report4NotEnd.totalTaskRunTimeInMs + sumPerf4Report.totalTaskRunTimeInMs);
+ jdo.setOdpsBlockCloseTimeMs(sumPerf4Report4NotEnd.odpsCloseTimeInMs + sumPerf4Report.odpsCloseTimeInMs);
+ jdo.setSqlQueryTimeMs(sumPerf4Report4NotEnd.sqlQueryTimeInMs + sumPerf4Report.sqlQueryTimeInMs);
+ jdo.setResultNextTimeMs(sumPerf4Report4NotEnd.resultNextTimeInMs + sumPerf4Report.resultNextTimeInMs);
+
+ return jdo;
+ } catch (Exception e) {
+ // do nothing
+ }
+
+ return null;
+ }
+
+ private void sumPerf4EndPrint(List totalEndReport) {
+ if (!enable || totalEndReport == null) {
+ return;
+ }
+
+ for (PerfRecord perfRecord : totalEndReport) {
+ perfRecordMaps4print.putIfAbsent(perfRecord.getPhase(), new SumPerfRecord4Print());
+ perfRecordMaps4print.get(perfRecord.getPhase()).add(perfRecord);
+ }
+
+ totalEndReport.clear();
+ }
+
+ public void setChannelNumber(int needChannelNumber) {
+ this.channelNumber = needChannelNumber;
+ }
+
+
+ public static class SumPerf4Report {
+ long totalTaskRunTimeInMs = 0L;
+ long odpsCloseTimeInMs = 0L;
+ long sqlQueryTimeInMs = 0L;
+ long resultNextTimeInMs = 0L;
+
+ public void add(long curNanoTime,PerfRecord perfRecord) {
+ try {
+ long runTimeEndInMs;
+ if (perfRecord.getElapsedTimeInNs() == -1) {
+ runTimeEndInMs = (curNanoTime - perfRecord.getStartTimeInNs()) / 1000000;
+ } else {
+ runTimeEndInMs = perfRecord.getElapsedTimeInNs() / 1000000;
+ }
+ switch (perfRecord.getPhase()) {
+ case TASK_TOTAL:
+ totalTaskRunTimeInMs += runTimeEndInMs;
+ break;
+ case SQL_QUERY:
+ sqlQueryTimeInMs += runTimeEndInMs;
+ break;
+ case RESULT_NEXT_ALL:
+ resultNextTimeInMs += runTimeEndInMs;
+ break;
+ case ODPS_BLOCK_CLOSE:
+ odpsCloseTimeInMs += runTimeEndInMs;
+ break;
+ }
+ }catch (Exception e){
+ //do nothing
+ }
+ }
+
+ public long getTotalTaskRunTimeInMs() {
+ return totalTaskRunTimeInMs;
+ }
+
+ public long getOdpsCloseTimeInMs() {
+ return odpsCloseTimeInMs;
+ }
+
+ public long getSqlQueryTimeInMs() {
+ return sqlQueryTimeInMs;
+ }
+
+ public long getResultNextTimeInMs() {
+ return resultNextTimeInMs;
+ }
+ }
+
+ public static class SumPerfRecord4Print {
+ private long perfTimeTotal = 0;
+ private long averageTime = 0;
+ private long maxTime = 0;
+ private int maxTaskId = -1;
+ private int maxTaskGroupId = -1;
+ private int totalCount = 0;
+
+ private long recordsTotal = 0;
+ private long sizesTotal = 0;
+ private long averageRecords = 0;
+ private long averageBytes = 0;
+ private long maxRecord = 0;
+ private long maxByte = 0;
+ private int maxTaskId4Records = -1;
+ private int maxTGID4Records = -1;
+
+ public void add(PerfRecord perfRecord) {
+ if (perfRecord == null) {
+ return;
+ }
+ perfTimeTotal += perfRecord.getElapsedTimeInNs();
+ if (perfRecord.getElapsedTimeInNs() >= maxTime) {
+ maxTime = perfRecord.getElapsedTimeInNs();
+ maxTaskId = perfRecord.getTaskId();
+ maxTaskGroupId = perfRecord.getTaskGroupId();
+ }
+
+ recordsTotal += perfRecord.getCount();
+ sizesTotal += perfRecord.getSize();
+ if (perfRecord.getCount() >= maxRecord) {
+ maxRecord = perfRecord.getCount();
+ maxByte = perfRecord.getSize();
+ maxTaskId4Records = perfRecord.getTaskId();
+ maxTGID4Records = perfRecord.getTaskGroupId();
+ }
+
+ totalCount++;
+ }
+
+ public long getPerfTimeTotal() {
+ return perfTimeTotal;
+ }
+
+ public long getAverageTime() {
+ if (totalCount > 0) {
+ averageTime = perfTimeTotal / totalCount;
+ }
+ return averageTime;
+ }
+
+ public long getMaxTime() {
+ return maxTime;
+ }
+
+ public int getMaxTaskId() {
+ return maxTaskId;
+ }
+
+ public int getMaxTaskGroupId() {
+ return maxTaskGroupId;
+ }
+
+ public long getRecordsTotal() {
+ return recordsTotal;
+ }
+
+ public long getSizesTotal() {
+ return sizesTotal;
+ }
+
+ public long getAverageRecords() {
+ if (totalCount > 0) {
+ averageRecords = recordsTotal / totalCount;
+ }
+ return averageRecords;
+ }
+
+ public long getAverageBytes() {
+ if (totalCount > 0) {
+ averageBytes = sizesTotal / totalCount;
+ }
+ return averageBytes;
+ }
+
+ public long getMaxRecord() {
+ return maxRecord;
+ }
+
+ public long getMaxByte() {
+ return maxByte;
+ }
+
+ public int getMaxTaskId4Records() {
+ return maxTaskId4Records;
+ }
+
+ public int getMaxTGID4Records() {
+ return maxTGID4Records;
+ }
+
+ public int getTotalCount() {
+ return totalCount;
+ }
+ }
+ class JobStatisticsDto2 {
+
+ private Long id;
+ private Date gmtCreate;
+ private Date gmtModified;
+ private Long instId;
+ private Long jobId;
+ private Long jobVersion;
+ private Integer taskGroupId;
+ private Date windowStart;
+ private Date windowEnd;
+ private Date jobStartTime;
+ private Date jobEndTime;
+ private Long jobRunTimeMs;
+ private Integer jobPriority;
+ private Integer channelNum;
+ private String cluster;
+ private String jobDomain;
+ private String srcType;
+ private String dstType;
+ private String srcGuid;
+ private String dstGuid;
+ private Long records;
+ private Long bytes;
+ private Long speedRecord;
+ private Long speedByte;
+ private String stagePercent;
+ private Long errorRecord;
+ private Long errorBytes;
+ private Long waitReadTimeMs;
+ private Long waitWriteTimeMs;
+ private Long odpsBlockCloseTimeMs;
+ private Long sqlQueryTimeMs;
+ private Long resultNextTimeMs;
+ private Long taskTotalTimeMs;
+ private String hostAddress;
+
+ public Long getId() {
+ return id;
+ }
+
+ public Date getGmtCreate() {
+ return gmtCreate;
+ }
+
+ public Date getGmtModified() {
+ return gmtModified;
+ }
+
+ public Long getInstId() {
+ return instId;
+ }
+
+ public Long getJobId() {
+ return jobId;
+ }
+
+ public Long getJobVersion() {
+ return jobVersion;
+ }
+
+ public Integer getTaskGroupId() {
+ return taskGroupId;
+ }
+
+ public Date getWindowStart() {
+ return windowStart;
+ }
+
+ public Date getWindowEnd() {
+ return windowEnd;
+ }
+
+ public Date getJobStartTime() {
+ return jobStartTime;
+ }
+
+ public Date getJobEndTime() {
+ return jobEndTime;
+ }
+
+ public Long getJobRunTimeMs() {
+ return jobRunTimeMs;
+ }
+
+ public Integer getJobPriority() {
+ return jobPriority;
+ }
+
+ public Integer getChannelNum() {
+ return channelNum;
+ }
+
+ public String getCluster() {
+ return cluster;
+ }
+
+ public String getJobDomain() {
+ return jobDomain;
+ }
+
+ public String getSrcType() {
+ return srcType;
+ }
+
+ public String getDstType() {
+ return dstType;
+ }
+
+ public String getSrcGuid() {
+ return srcGuid;
+ }
+
+ public String getDstGuid() {
+ return dstGuid;
+ }
+
+ public Long getRecords() {
+ return records;
+ }
+
+ public Long getBytes() {
+ return bytes;
+ }
+
+ public Long getSpeedRecord() {
+ return speedRecord;
+ }
+
+ public Long getSpeedByte() {
+ return speedByte;
+ }
+
+ public String getStagePercent() {
+ return stagePercent;
+ }
+
+ public Long getErrorRecord() {
+ return errorRecord;
+ }
+
+ public Long getErrorBytes() {
+ return errorBytes;
+ }
+
+ public Long getWaitReadTimeMs() {
+ return waitReadTimeMs;
+ }
+
+ public Long getWaitWriteTimeMs() {
+ return waitWriteTimeMs;
+ }
+
+ public Long getOdpsBlockCloseTimeMs() {
+ return odpsBlockCloseTimeMs;
+ }
+
+ public Long getSqlQueryTimeMs() {
+ return sqlQueryTimeMs;
+ }
+
+ public Long getResultNextTimeMs() {
+ return resultNextTimeMs;
+ }
+
+ public Long getTaskTotalTimeMs() {
+ return taskTotalTimeMs;
+ }
+
+ public String getHostAddress() {
+ return hostAddress;
+ }
+
+ public void setId(Long id) {
+ this.id = id;
+ }
+
+ public void setGmtCreate(Date gmtCreate) {
+ this.gmtCreate = gmtCreate;
+ }
+
+ public void setGmtModified(Date gmtModified) {
+ this.gmtModified = gmtModified;
+ }
+
+ public void setInstId(Long instId) {
+ this.instId = instId;
+ }
+
+ public void setJobId(Long jobId) {
+ this.jobId = jobId;
+ }
+
+ public void setJobVersion(Long jobVersion) {
+ this.jobVersion = jobVersion;
+ }
+
+ public void setTaskGroupId(Integer taskGroupId) {
+ this.taskGroupId = taskGroupId;
+ }
+
+ public void setWindowStart(Date windowStart) {
+ this.windowStart = windowStart;
+ }
+
+ public void setWindowEnd(Date windowEnd) {
+ this.windowEnd = windowEnd;
+ }
+
+ public void setJobStartTime(Date jobStartTime) {
+ this.jobStartTime = jobStartTime;
+ }
+
+ public void setJobEndTime(Date jobEndTime) {
+ this.jobEndTime = jobEndTime;
+ }
+
+ public void setJobRunTimeMs(Long jobRunTimeMs) {
+ this.jobRunTimeMs = jobRunTimeMs;
+ }
+
+ public void setJobPriority(Integer jobPriority) {
+ this.jobPriority = jobPriority;
+ }
+
+ public void setChannelNum(Integer channelNum) {
+ this.channelNum = channelNum;
+ }
+
+ public void setCluster(String cluster) {
+ this.cluster = cluster;
+ }
+
+ public void setJobDomain(String jobDomain) {
+ this.jobDomain = jobDomain;
+ }
+
+ public void setSrcType(String srcType) {
+ this.srcType = srcType;
+ }
+
+ public void setDstType(String dstType) {
+ this.dstType = dstType;
+ }
+
+ public void setSrcGuid(String srcGuid) {
+ this.srcGuid = srcGuid;
+ }
+
+ public void setDstGuid(String dstGuid) {
+ this.dstGuid = dstGuid;
+ }
+
+ public void setRecords(Long records) {
+ this.records = records;
+ }
+
+ public void setBytes(Long bytes) {
+ this.bytes = bytes;
+ }
+
+ public void setSpeedRecord(Long speedRecord) {
+ this.speedRecord = speedRecord;
+ }
+
+ public void setSpeedByte(Long speedByte) {
+ this.speedByte = speedByte;
+ }
+
+ public void setStagePercent(String stagePercent) {
+ this.stagePercent = stagePercent;
+ }
+
+ public void setErrorRecord(Long errorRecord) {
+ this.errorRecord = errorRecord;
+ }
+
+ public void setErrorBytes(Long errorBytes) {
+ this.errorBytes = errorBytes;
+ }
+
+ public void setWaitReadTimeMs(Long waitReadTimeMs) {
+ this.waitReadTimeMs = waitReadTimeMs;
+ }
+
+ public void setWaitWriteTimeMs(Long waitWriteTimeMs) {
+ this.waitWriteTimeMs = waitWriteTimeMs;
+ }
+
+ public void setOdpsBlockCloseTimeMs(Long odpsBlockCloseTimeMs) {
+ this.odpsBlockCloseTimeMs = odpsBlockCloseTimeMs;
+ }
+
+ public void setSqlQueryTimeMs(Long sqlQueryTimeMs) {
+ this.sqlQueryTimeMs = sqlQueryTimeMs;
+ }
+
+ public void setResultNextTimeMs(Long resultNextTimeMs) {
+ this.resultNextTimeMs = resultNextTimeMs;
+ }
+
+ public void setTaskTotalTimeMs(Long taskTotalTimeMs) {
+ this.taskTotalTimeMs = taskTotalTimeMs;
+ }
+
+ public void setHostAddress(String hostAddress) {
+ this.hostAddress = hostAddress;
+ }
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/statistics/VMInfo.java b/common/src/main/java/com/alibaba/datax/common/statistics/VMInfo.java
new file mode 100644
index 0000000000..cab42a4b94
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/statistics/VMInfo.java
@@ -0,0 +1,412 @@
+package com.alibaba.datax.common.statistics;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.management.GarbageCollectorMXBean;
+import java.lang.management.MemoryPoolMXBean;
+import java.lang.management.OperatingSystemMXBean;
+import java.lang.management.RuntimeMXBean;
+import java.lang.reflect.Method;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Created by liqiang on 15/11/12.
+ */
+public class VMInfo {
+ private static final Logger LOG = LoggerFactory.getLogger(VMInfo.class);
+ static final long MB = 1024 * 1024;
+ static final long GB = 1024 * 1024 * 1024;
+ public static Object lock = new Object();
+ private static VMInfo vmInfo;
+
+ /**
+ * @return null or vmInfo. null is something error, job no care it.
+ */
+ public static VMInfo getVmInfo() {
+ if (vmInfo == null) {
+ synchronized (lock) {
+ if (vmInfo == null) {
+ try {
+ vmInfo = new VMInfo();
+ } catch (Exception e) {
+ LOG.warn("no need care, the fail is ignored : vmInfo init failed " + e.getMessage(), e);
+ }
+ }
+ }
+
+ }
+ return vmInfo;
+ }
+
+ // 数据的MxBean
+ private final OperatingSystemMXBean osMXBean;
+ private final RuntimeMXBean runtimeMXBean;
+ private final List garbageCollectorMXBeanList;
+ private final List memoryPoolMXBeanList;
+ /**
+ * 静态信息
+ */
+ private final String osInfo;
+ private final String jvmInfo;
+
+ /**
+ * cpu个数
+ */
+ private final int totalProcessorCount;
+
+ /**
+ * 机器的各个状态,用于中间打印和统计上报
+ */
+ private final PhyOSStatus startPhyOSStatus;
+ private final ProcessCpuStatus processCpuStatus = new ProcessCpuStatus();
+ private final ProcessGCStatus processGCStatus = new ProcessGCStatus();
+ private final ProcessMemoryStatus processMomoryStatus = new ProcessMemoryStatus();
+ //ms
+ private long lastUpTime = 0;
+ //nano
+ private long lastProcessCpuTime = 0;
+
+
+ private VMInfo() {
+ //初始化静态信息
+ osMXBean = java.lang.management.ManagementFactory.getOperatingSystemMXBean();
+ runtimeMXBean = java.lang.management.ManagementFactory.getRuntimeMXBean();
+ garbageCollectorMXBeanList = java.lang.management.ManagementFactory.getGarbageCollectorMXBeans();
+ memoryPoolMXBeanList = java.lang.management.ManagementFactory.getMemoryPoolMXBeans();
+
+ osInfo = runtimeMXBean.getVmVendor() + " " + runtimeMXBean.getSpecVersion() + " " + runtimeMXBean.getVmVersion();
+ jvmInfo = osMXBean.getName() + " " + osMXBean.getArch() + " " + osMXBean.getVersion();
+ totalProcessorCount = osMXBean.getAvailableProcessors();
+
+ //构建startPhyOSStatus
+ startPhyOSStatus = new PhyOSStatus();
+ LOG.info("VMInfo# operatingSystem class => " + osMXBean.getClass().getName());
+ if (VMInfo.isSunOsMBean(osMXBean)) {
+ {
+ startPhyOSStatus.totalPhysicalMemory = VMInfo.getLongFromOperatingSystem(osMXBean, "getTotalPhysicalMemorySize");
+ startPhyOSStatus.freePhysicalMemory = VMInfo.getLongFromOperatingSystem(osMXBean, "getFreePhysicalMemorySize");
+ startPhyOSStatus.maxFileDescriptorCount = VMInfo.getLongFromOperatingSystem(osMXBean, "getMaxFileDescriptorCount");
+ startPhyOSStatus.currentOpenFileDescriptorCount = VMInfo.getLongFromOperatingSystem(osMXBean, "getOpenFileDescriptorCount");
+ }
+ }
+
+ //初始化processGCStatus;
+ for (GarbageCollectorMXBean garbage : garbageCollectorMXBeanList) {
+ GCStatus gcStatus = new GCStatus();
+ gcStatus.name = garbage.getName();
+ processGCStatus.gcStatusMap.put(garbage.getName(), gcStatus);
+ }
+
+ //初始化processMemoryStatus
+ if (memoryPoolMXBeanList != null && !memoryPoolMXBeanList.isEmpty()) {
+ for (MemoryPoolMXBean pool : memoryPoolMXBeanList) {
+ MemoryStatus memoryStatus = new MemoryStatus();
+ memoryStatus.name = pool.getName();
+ memoryStatus.initSize = pool.getUsage().getInit();
+ memoryStatus.maxSize = pool.getUsage().getMax();
+ processMomoryStatus.memoryStatusMap.put(pool.getName(), memoryStatus);
+ }
+ }
+ }
+
+ public String toString() {
+ return "the machine info => \n\n"
+ + "\tosInfo:\t" + osInfo + "\n"
+ + "\tjvmInfo:\t" + jvmInfo + "\n"
+ + "\tcpu num:\t" + totalProcessorCount + "\n\n"
+ + startPhyOSStatus.toString() + "\n"
+ + processGCStatus.toString() + "\n"
+ + processMomoryStatus.toString() + "\n";
+ }
+
+ public String totalString() {
+ return (processCpuStatus.getTotalString() + processGCStatus.getTotalString());
+ }
+
+ public void getDelta() {
+ getDelta(true);
+ }
+
+ public synchronized void getDelta(boolean print) {
+
+ try {
+ if (VMInfo.isSunOsMBean(osMXBean)) {
+ long curUptime = runtimeMXBean.getUptime();
+ long curProcessTime = getLongFromOperatingSystem(osMXBean, "getProcessCpuTime");
+ //百分比, uptime是ms,processTime是nano
+ if ((curUptime > lastUpTime) && (curProcessTime >= lastProcessCpuTime)) {
+ float curDeltaCpu = (float) (curProcessTime - lastProcessCpuTime) / ((curUptime - lastUpTime) * totalProcessorCount * 10000);
+ processCpuStatus.setMaxMinCpu(curDeltaCpu);
+ processCpuStatus.averageCpu = (float) curProcessTime / (curUptime * totalProcessorCount * 10000);
+
+ lastUpTime = curUptime;
+ lastProcessCpuTime = curProcessTime;
+ }
+ }
+
+ for (GarbageCollectorMXBean garbage : garbageCollectorMXBeanList) {
+
+ GCStatus gcStatus = processGCStatus.gcStatusMap.get(garbage.getName());
+ if (gcStatus == null) {
+ gcStatus = new GCStatus();
+ gcStatus.name = garbage.getName();
+ processGCStatus.gcStatusMap.put(garbage.getName(), gcStatus);
+ }
+
+ long curTotalGcCount = garbage.getCollectionCount();
+ gcStatus.setCurTotalGcCount(curTotalGcCount);
+
+ long curtotalGcTime = garbage.getCollectionTime();
+ gcStatus.setCurTotalGcTime(curtotalGcTime);
+ }
+
+ if (memoryPoolMXBeanList != null && !memoryPoolMXBeanList.isEmpty()) {
+ for (MemoryPoolMXBean pool : memoryPoolMXBeanList) {
+
+ MemoryStatus memoryStatus = processMomoryStatus.memoryStatusMap.get(pool.getName());
+ if (memoryStatus == null) {
+ memoryStatus = new MemoryStatus();
+ memoryStatus.name = pool.getName();
+ processMomoryStatus.memoryStatusMap.put(pool.getName(), memoryStatus);
+ }
+ memoryStatus.commitedSize = pool.getUsage().getCommitted();
+ memoryStatus.setMaxMinUsedSize(pool.getUsage().getUsed());
+ long maxMemory = memoryStatus.commitedSize > 0 ? memoryStatus.commitedSize : memoryStatus.maxSize;
+ memoryStatus.setMaxMinPercent(maxMemory > 0 ? (float) 100 * memoryStatus.usedSize / maxMemory : -1);
+ }
+ }
+
+ if (print) {
+ LOG.info(processCpuStatus.getDeltaString() + processMomoryStatus.getDeltaString() + processGCStatus.getDeltaString());
+ }
+
+ } catch (Exception e) {
+ LOG.warn("no need care, the fail is ignored : vmInfo getDelta failed " + e.getMessage(), e);
+ }
+ }
+
+ public static boolean isSunOsMBean(OperatingSystemMXBean operatingSystem) {
+ final String className = operatingSystem.getClass().getName();
+
+ return "com.sun.management.UnixOperatingSystem".equals(className);
+ }
+
+ public static long getLongFromOperatingSystem(OperatingSystemMXBean operatingSystem, String methodName) {
+ try {
+ final Method method = operatingSystem.getClass().getMethod(methodName, (Class>[]) null);
+ method.setAccessible(true);
+ return (Long) method.invoke(operatingSystem, (Object[]) null);
+ } catch (final Exception e) {
+ LOG.info(String.format("OperatingSystemMXBean %s failed, Exception = %s ", methodName, e.getMessage()));
+ }
+
+ return -1;
+ }
+
+ private class PhyOSStatus {
+ long totalPhysicalMemory = -1;
+ long freePhysicalMemory = -1;
+ long maxFileDescriptorCount = -1;
+ long currentOpenFileDescriptorCount = -1;
+
+ public String toString() {
+ return String.format("\ttotalPhysicalMemory:\t%,.2fG\n"
+ + "\tfreePhysicalMemory:\t%,.2fG\n"
+ + "\tmaxFileDescriptorCount:\t%s\n"
+ + "\tcurrentOpenFileDescriptorCount:\t%s\n",
+ (float) totalPhysicalMemory / GB, (float) freePhysicalMemory / GB, maxFileDescriptorCount, currentOpenFileDescriptorCount);
+ }
+ }
+
+ private class ProcessCpuStatus {
+ // 百分比的值 比如30.0 表示30.0%
+ float maxDeltaCpu = -1;
+ float minDeltaCpu = -1;
+ float curDeltaCpu = -1;
+ float averageCpu = -1;
+
+ public void setMaxMinCpu(float curCpu) {
+ this.curDeltaCpu = curCpu;
+ if (maxDeltaCpu < curCpu) {
+ maxDeltaCpu = curCpu;
+ }
+
+ if (minDeltaCpu == -1 || minDeltaCpu > curCpu) {
+ minDeltaCpu = curCpu;
+ }
+ }
+
+ public String getDeltaString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("\n\t [delta cpu info] => \n");
+ sb.append("\t\t");
+ sb.append(String.format("%-30s | %-30s | %-30s | %-30s \n", "curDeltaCpu", "averageCpu", "maxDeltaCpu", "minDeltaCpu"));
+ sb.append("\t\t");
+ sb.append(String.format("%-30s | %-30s | %-30s | %-30s \n",
+ String.format("%,.2f%%", processCpuStatus.curDeltaCpu),
+ String.format("%,.2f%%", processCpuStatus.averageCpu),
+ String.format("%,.2f%%", processCpuStatus.maxDeltaCpu),
+ String.format("%,.2f%%\n", processCpuStatus.minDeltaCpu)));
+
+ return sb.toString();
+ }
+
+ public String getTotalString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("\n\t [total cpu info] => \n");
+ sb.append("\t\t");
+ sb.append(String.format("%-30s | %-30s | %-30s \n", "averageCpu", "maxDeltaCpu", "minDeltaCpu"));
+ sb.append("\t\t");
+ sb.append(String.format("%-30s | %-30s | %-30s \n",
+ String.format("%,.2f%%", processCpuStatus.averageCpu),
+ String.format("%,.2f%%", processCpuStatus.maxDeltaCpu),
+ String.format("%,.2f%%\n", processCpuStatus.minDeltaCpu)));
+
+ return sb.toString();
+ }
+
+ }
+
+ private class ProcessGCStatus {
+ final Map gcStatusMap = new HashMap();
+
+ public String toString() {
+ return "\tGC Names\t" + gcStatusMap.keySet() + "\n";
+ }
+
+ public String getDeltaString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("\n\t [delta gc info] => \n");
+ sb.append("\t\t ");
+ sb.append(String.format("%-20s | %-18s | %-18s | %-18s | %-18s | %-18s | %-18s | %-18s | %-18s \n", "NAME", "curDeltaGCCount", "totalGCCount", "maxDeltaGCCount", "minDeltaGCCount", "curDeltaGCTime", "totalGCTime", "maxDeltaGCTime", "minDeltaGCTime"));
+ for (GCStatus gc : gcStatusMap.values()) {
+ sb.append("\t\t ");
+ sb.append(String.format("%-20s | %-18s | %-18s | %-18s | %-18s | %-18s | %-18s | %-18s | %-18s \n",
+ gc.name, gc.curDeltaGCCount, gc.totalGCCount, gc.maxDeltaGCCount, gc.minDeltaGCCount,
+ String.format("%,.3fs",(float)gc.curDeltaGCTime/1000),
+ String.format("%,.3fs",(float)gc.totalGCTime/1000),
+ String.format("%,.3fs",(float)gc.maxDeltaGCTime/1000),
+ String.format("%,.3fs",(float)gc.minDeltaGCTime/1000)));
+
+ }
+ return sb.toString();
+ }
+
+ public String getTotalString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("\n\t [total gc info] => \n");
+ sb.append("\t\t ");
+ sb.append(String.format("%-20s | %-18s | %-18s | %-18s | %-18s | %-18s | %-18s \n", "NAME", "totalGCCount", "maxDeltaGCCount", "minDeltaGCCount", "totalGCTime", "maxDeltaGCTime", "minDeltaGCTime"));
+ for (GCStatus gc : gcStatusMap.values()) {
+ sb.append("\t\t ");
+ sb.append(String.format("%-20s | %-18s | %-18s | %-18s | %-18s | %-18s | %-18s \n",
+ gc.name, gc.totalGCCount, gc.maxDeltaGCCount, gc.minDeltaGCCount,
+ String.format("%,.3fs",(float)gc.totalGCTime/1000),
+ String.format("%,.3fs",(float)gc.maxDeltaGCTime/1000),
+ String.format("%,.3fs",(float)gc.minDeltaGCTime/1000)));
+
+ }
+ return sb.toString();
+ }
+ }
+
+ private class ProcessMemoryStatus {
+ final Map memoryStatusMap = new HashMap();
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("\t");
+ sb.append(String.format("%-30s | %-30s | %-30s \n", "MEMORY_NAME", "allocation_size", "init_size"));
+ for (MemoryStatus ms : memoryStatusMap.values()) {
+ sb.append("\t");
+ sb.append(String.format("%-30s | %-30s | %-30s \n",
+ ms.name, String.format("%,.2fMB", (float) ms.maxSize / MB), String.format("%,.2fMB", (float) ms.initSize / MB)));
+ }
+ return sb.toString();
+ }
+
+ public String getDeltaString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("\n\t [delta memory info] => \n");
+ sb.append("\t\t ");
+ sb.append(String.format("%-30s | %-30s | %-30s | %-30s | %-30s \n", "NAME", "used_size", "used_percent", "max_used_size", "max_percent"));
+ for (MemoryStatus ms : memoryStatusMap.values()) {
+ sb.append("\t\t ");
+ sb.append(String.format("%-30s | %-30s | %-30s | %-30s | %-30s \n",
+ ms.name, String.format("%,.2f", (float) ms.usedSize / MB) + "MB",
+ String.format("%,.2f", (float) ms.percent) + "%",
+ String.format("%,.2f", (float) ms.maxUsedSize / MB) + "MB",
+ String.format("%,.2f", (float) ms.maxpercent) + "%"));
+
+ }
+ return sb.toString();
+ }
+ }
+
+ private class GCStatus {
+ String name;
+ long maxDeltaGCCount = -1;
+ long minDeltaGCCount = -1;
+ long curDeltaGCCount;
+ long totalGCCount = 0;
+ long maxDeltaGCTime = -1;
+ long minDeltaGCTime = -1;
+ long curDeltaGCTime;
+ long totalGCTime = 0;
+
+ public void setCurTotalGcCount(long curTotalGcCount) {
+ this.curDeltaGCCount = curTotalGcCount - totalGCCount;
+ this.totalGCCount = curTotalGcCount;
+
+ if (maxDeltaGCCount < curDeltaGCCount) {
+ maxDeltaGCCount = curDeltaGCCount;
+ }
+
+ if (minDeltaGCCount == -1 || minDeltaGCCount > curDeltaGCCount) {
+ minDeltaGCCount = curDeltaGCCount;
+ }
+ }
+
+ public void setCurTotalGcTime(long curTotalGcTime) {
+ this.curDeltaGCTime = curTotalGcTime - totalGCTime;
+ this.totalGCTime = curTotalGcTime;
+
+ if (maxDeltaGCTime < curDeltaGCTime) {
+ maxDeltaGCTime = curDeltaGCTime;
+ }
+
+ if (minDeltaGCTime == -1 || minDeltaGCTime > curDeltaGCTime) {
+ minDeltaGCTime = curDeltaGCTime;
+ }
+ }
+ }
+
+ private class MemoryStatus {
+ String name;
+ long initSize;
+ long maxSize;
+ long commitedSize;
+ long usedSize;
+ float percent;
+ long maxUsedSize = -1;
+ float maxpercent = 0;
+
+ void setMaxMinUsedSize(long curUsedSize) {
+ if (maxUsedSize < curUsedSize) {
+ maxUsedSize = curUsedSize;
+ }
+ this.usedSize = curUsedSize;
+ }
+
+ void setMaxMinPercent(float curPercent) {
+ if (maxpercent < curPercent) {
+ maxpercent = curPercent;
+ }
+ this.percent = curPercent;
+ }
+ }
+
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/util/Configuration.java b/common/src/main/java/com/alibaba/datax/common/util/Configuration.java
new file mode 100755
index 0000000000..f570dd00c2
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/util/Configuration.java
@@ -0,0 +1,1078 @@
+package com.alibaba.datax.common.util;
+
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.spi.ErrorCode;
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.serializer.SerializerFeature;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.CharUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * Configuration 提供多级JSON配置信息无损存储
+ *
+ *
+ * 实例代码:
+ *
+ * 获取job的配置信息
+ * Configuration configuration = Configuration.from(new File("Config.json"));
+ * String jobContainerClass =
+ * configuration.getString("core.container.job.class");
+ *
+ *
+ * 设置多级List
+ * configuration.set("job.reader.parameter.jdbcUrl", Arrays.asList(new String[]
+ * {"jdbc", "jdbc"}));
+ *
+ *
+ *
+ *
+ * 合并Configuration:
+ * configuration.merge(another);
+ *
+ *
+ *
+ *
+ *
+ *
+ * Configuration 存在两种较好地实现方式
+ * 第一种是将JSON配置信息中所有的Key全部打平,用a.b.c的级联方式作为Map的Key,内部使用一个Map保存信息
+ * 第二种是将JSON的对象直接使用结构化树形结构保存
+ *
+ * 目前使用的第二种实现方式,使用第一种的问题在于:
+ * 1. 插入新对象,比较难处理,例如a.b.c="bazhen",此时如果需要插入a="bazhen",也即是根目录下第一层所有类型全部要废弃
+ * ,使用"bazhen"作为value,第一种方式使用字符串表示key,难以处理这类问题。
+ * 2. 返回树形结构,例如 a.b.c.d = "bazhen",如果返回"a"下的所有元素,实际上是一个Map,需要合并处理
+ * 3. 输出JSON,将上述对象转为JSON,要把上述Map的多级key转为树形结构,并输出为JSON
+ */
+public class Configuration {
+
+ /**
+ * 对于加密的keyPath,需要记录下来
+ * 为的是后面分布式情况下将该值加密后抛到DataXServer中
+ */
+ private Set secretKeyPathSet =
+ new HashSet();
+
+ private Object root = null;
+
+ /**
+ * 初始化空白的Configuration
+ */
+ public static Configuration newDefault() {
+ return Configuration.from("{}");
+ }
+
+ /**
+ * 从JSON字符串加载Configuration
+ */
+ public static Configuration from(String json) {
+ json = StrUtil.replaceVariable(json);
+ checkJSON(json);
+
+ try {
+ return new Configuration(json);
+ } catch (Exception e) {
+ throw DataXException.asDataXException(CommonErrorCode.CONFIG_ERROR,
+ e);
+ }
+
+ }
+
+ /**
+ * 从包括json的File对象加载Configuration
+ */
+ public static Configuration from(File file) {
+ try {
+ return Configuration.from(IOUtils
+ .toString(new FileInputStream(file)));
+ } catch (FileNotFoundException e) {
+ throw DataXException.asDataXException(CommonErrorCode.CONFIG_ERROR,
+ String.format("配置信息错误,您提供的配置文件[%s]不存在. 请检查您的配置文件.", file.getAbsolutePath()));
+ } catch (IOException e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONFIG_ERROR,
+ String.format("配置信息错误. 您提供配置文件[%s]读取失败,错误原因: %s. 请检查您的配置文件的权限设置.",
+ file.getAbsolutePath(), e));
+ }
+ }
+
+ /**
+ * 从包括json的InputStream对象加载Configuration
+ */
+ public static Configuration from(InputStream is) {
+ try {
+ return Configuration.from(IOUtils.toString(is));
+ } catch (IOException e) {
+ throw DataXException.asDataXException(CommonErrorCode.CONFIG_ERROR,
+ String.format("请检查您的配置文件. 您提供的配置文件读取失败,错误原因: %s. 请检查您的配置文件的权限设置.", e));
+ }
+ }
+
+ /**
+ * 从Map对象加载Configuration
+ */
+ public static Configuration from(final Map object) {
+ return Configuration.from(Configuration.toJSONString(object));
+ }
+
+ /**
+ * 从List对象加载Configuration
+ */
+ public static Configuration from(final List object) {
+ return Configuration.from(Configuration.toJSONString(object));
+ }
+
+ public String getNecessaryValue(String key, ErrorCode errorCode) {
+ String value = this.getString(key, null);
+ if (StringUtils.isBlank(value)) {
+ throw DataXException.asDataXException(errorCode,
+ String.format("您提供配置文件有误,[%s]是必填参数,不允许为空或者留白 .", key));
+ }
+
+ return value;
+ }
+
+ public String getUnnecessaryValue(String key,String defaultValue,ErrorCode errorCode) {
+ String value = this.getString(key, defaultValue);
+ if (StringUtils.isBlank(value)) {
+ value = defaultValue;
+ }
+ return value;
+ }
+
+ public Boolean getNecessaryBool(String key, ErrorCode errorCode) {
+ Boolean value = this.getBool(key);
+ if (value == null) {
+ throw DataXException.asDataXException(errorCode,
+ String.format("您提供配置文件有误,[%s]是必填参数,不允许为空或者留白 .", key));
+ }
+
+ return value;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址具体的对象。
+ *
+ *
+ *
+ * NOTE: 目前仅支持Map以及List下标寻址, 例如:
+ *
+ *
+ *
+ * 对于如下JSON
+ *
+ * {"a": {"b": {"c": [0,1,2,3]}}}
+ *
+ * config.get("") 返回整个Map
+ * config.get("a") 返回a下属整个Map
+ * config.get("a.b.c") 返回c对应的数组List
+ * config.get("a.b.c[0]") 返回数字0
+ *
+ * @return Java表示的JSON对象,如果path不存在或者对象不存在,均返回null。
+ */
+ public Object get(final String path) {
+ this.checkPath(path);
+ try {
+ return this.findObject(path);
+ } catch (Exception e) {
+ return null;
+ }
+ }
+
+ /**
+ * 用户指定部分path,获取Configuration的子集
+ *
+ *
+ * 如果path获取的路径或者对象不存在,返回null
+ */
+ public Configuration getConfiguration(final String path) {
+ Object object = this.get(path);
+ if (null == object) {
+ return null;
+ }
+
+ return Configuration.from(Configuration.toJSONString(object));
+ }
+
+ /**
+ * 根据用户提供的json path,寻址String对象
+ *
+ * @return String对象,如果path不存在或者String不存在,返回null
+ */
+ public String getString(final String path) {
+ Object string = this.get(path);
+ if (null == string) {
+ return null;
+ }
+ return String.valueOf(string);
+ }
+
+ /**
+ * 根据用户提供的json path,寻址String对象,如果对象不存在,返回默认字符串
+ *
+ * @return String对象,如果path不存在或者String不存在,返回默认字符串
+ */
+ public String getString(final String path, final String defaultValue) {
+ String result = this.getString(path);
+
+ if (null == result) {
+ return defaultValue;
+ }
+
+ return result;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Character对象
+ *
+ * @return Character对象,如果path不存在或者Character不存在,返回null
+ */
+ public Character getChar(final String path) {
+ String result = this.getString(path);
+ if (null == result) {
+ return null;
+ }
+
+ try {
+ return CharUtils.toChar(result);
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONFIG_ERROR,
+ String.format("任务读取配置文件出错. 因为配置文件路径[%s] 值非法,期望是字符类型: %s. 请检查您的配置并作出修改.", path,
+ e.getMessage()));
+ }
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Boolean对象,如果对象不存在,返回默认Character对象
+ *
+ * @return Character对象,如果path不存在或者Character不存在,返回默认Character对象
+ */
+ public Character getChar(final String path, char defaultValue) {
+ Character result = this.getChar(path);
+ if (null == result) {
+ return defaultValue;
+ }
+ return result;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Boolean对象
+ *
+ * @return Boolean对象,如果path值非true,false ,将报错.特别注意:当 path 不存在时,会返回:null.
+ */
+ public Boolean getBool(final String path) {
+ String result = this.getString(path);
+
+ if (null == result) {
+ return null;
+ } else if ("true".equalsIgnoreCase(result)) {
+ return Boolean.TRUE;
+ } else if ("false".equalsIgnoreCase(result)) {
+ return Boolean.FALSE;
+ } else {
+ throw DataXException.asDataXException(CommonErrorCode.CONFIG_ERROR,
+ String.format("您提供的配置信息有误,因为从[%s]获取的值[%s]无法转换为bool类型. 请检查源表的配置并且做出相应的修改.",
+ path, result));
+ }
+
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Boolean对象,如果对象不存在,返回默认Boolean对象
+ *
+ * @return Boolean对象,如果path不存在或者Boolean不存在,返回默认Boolean对象
+ */
+ public Boolean getBool(final String path, boolean defaultValue) {
+ Boolean result = this.getBool(path);
+ if (null == result) {
+ return defaultValue;
+ }
+ return result;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Integer对象
+ *
+ * @return Integer对象,如果path不存在或者Integer不存在,返回null
+ */
+ public Integer getInt(final String path) {
+ String result = this.getString(path);
+ if (null == result) {
+ return null;
+ }
+
+ try {
+ return Integer.valueOf(result);
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONFIG_ERROR,
+ String.format("任务读取配置文件出错. 配置文件路径[%s] 值非法, 期望是整数类型: %s. 请检查您的配置并作出修改.", path,
+ e.getMessage()));
+ }
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Integer对象,如果对象不存在,返回默认Integer对象
+ *
+ * @return Integer对象,如果path不存在或者Integer不存在,返回默认Integer对象
+ */
+ public Integer getInt(final String path, int defaultValue) {
+ Integer object = this.getInt(path);
+ if (null == object) {
+ return defaultValue;
+ }
+ return object;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Long对象
+ *
+ * @return Long对象,如果path不存在或者Long不存在,返回null
+ */
+ public Long getLong(final String path) {
+ String result = this.getString(path);
+ if (StringUtils.isBlank(result)) {
+ return null;
+ }
+
+ try {
+ return Long.valueOf(result);
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONFIG_ERROR,
+ String.format("任务读取配置文件出错. 配置文件路径[%s] 值非法, 期望是整数类型: %s. 请检查您的配置并作出修改.", path,
+ e.getMessage()));
+ }
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Long对象,如果对象不存在,返回默认Long对象
+ *
+ * @return Long对象,如果path不存在或者Integer不存在,返回默认Long对象
+ */
+ public Long getLong(final String path, long defaultValue) {
+ Long result = this.getLong(path);
+ if (null == result) {
+ return defaultValue;
+ }
+ return result;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Double对象
+ *
+ * @return Double对象,如果path不存在或者Double不存在,返回null
+ */
+ public Double getDouble(final String path) {
+ String result = this.getString(path);
+ if (StringUtils.isBlank(result)) {
+ return null;
+ }
+
+ try {
+ return Double.valueOf(result);
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.CONFIG_ERROR,
+ String.format("任务读取配置文件出错. 配置文件路径[%s] 值非法, 期望是浮点类型: %s. 请检查您的配置并作出修改.", path,
+ e.getMessage()));
+ }
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Double对象,如果对象不存在,返回默认Double对象
+ *
+ * @return Double对象,如果path不存在或者Double不存在,返回默认Double对象
+ */
+ public Double getDouble(final String path, double defaultValue) {
+ Double result = this.getDouble(path);
+ if (null == result) {
+ return defaultValue;
+ }
+ return result;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址List对象,如果对象不存在,返回null
+ */
+ @SuppressWarnings("unchecked")
+ public List getList(final String path) {
+ List list = this.get(path, List.class);
+ if (null == list) {
+ return null;
+ }
+ return list;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址List对象,如果对象不存在,返回null
+ */
+ @SuppressWarnings("unchecked")
+ public List getList(final String path, Class t) {
+ Object object = this.get(path, List.class);
+ if (null == object) {
+ return null;
+ }
+
+ List result = new ArrayList();
+
+ List origin = (List) object;
+ for (final Object each : origin) {
+ result.add((T) each);
+ }
+
+ return result;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址List对象,如果对象不存在,返回默认List
+ */
+ @SuppressWarnings("unchecked")
+ public List getList(final String path,
+ final List defaultList) {
+ Object object = this.getList(path);
+ if (null == object) {
+ return defaultList;
+ }
+ return (List) object;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址List对象,如果对象不存在,返回默认List
+ */
+ public List getList(final String path, final List defaultList,
+ Class t) {
+ List list = this.getList(path, t);
+ if (null == list) {
+ return defaultList;
+ }
+ return list;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址包含Configuration的List,如果对象不存在,返回默认null
+ */
+ public List getListConfiguration(final String path) {
+ List lists = getList(path);
+ if (lists == null) {
+ return null;
+ }
+
+ List result = new ArrayList();
+ for (final Object object : lists) {
+ result.add(Configuration.from(Configuration.toJSONString(object)));
+ }
+ return result;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Map对象,如果对象不存在,返回null
+ */
+ @SuppressWarnings("unchecked")
+ public Map getMap(final String path) {
+ Map result = this.get(path, Map.class);
+ if (null == result) {
+ return null;
+ }
+ return result;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Map对象,如果对象不存在,返回null;
+ */
+ @SuppressWarnings("unchecked")
+ public Map getMap(final String path, Class t) {
+ Map map = this.get(path, Map.class);
+ if (null == map) {
+ return null;
+ }
+
+ Map result = new HashMap();
+ for (final String key : map.keySet()) {
+ result.put(key, (T) map.get(key));
+ }
+
+ return result;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Map对象,如果对象不存在,返回默认map
+ */
+ @SuppressWarnings("unchecked")
+ public Map getMap(final String path,
+ final Map defaultMap) {
+ Object object = this.getMap(path);
+ if (null == object) {
+ return defaultMap;
+ }
+ return (Map) object;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址Map对象,如果对象不存在,返回默认map
+ */
+ public Map getMap(final String path,
+ final Map defaultMap, Class t) {
+ Map result = getMap(path, t);
+ if (null == result) {
+ return defaultMap;
+ }
+ return result;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址包含Configuration的Map,如果对象不存在,返回默认null
+ */
+ @SuppressWarnings("unchecked")
+ public Map getMapConfiguration(final String path) {
+ Map map = this.get(path, Map.class);
+ if (null == map) {
+ return null;
+ }
+
+ Map result = new HashMap();
+ for (final String key : map.keySet()) {
+ result.put(key, Configuration.from(Configuration.toJSONString(map
+ .get(key))));
+ }
+
+ return result;
+ }
+
+ /**
+ * 根据用户提供的json path,寻址具体的对象,并转为用户提供的类型
+ *
+ *
+ *
+ * NOTE: 目前仅支持Map以及List下标寻址, 例如:
+ *
+ *
+ *
+ * 对于如下JSON
+ *
+ * {"a": {"b": {"c": [0,1,2,3]}}}
+ *
+ * config.get("") 返回整个Map
+ * config.get("a") 返回a下属整个Map
+ * config.get("a.b.c") 返回c对应的数组List
+ * config.get("a.b.c[0]") 返回数字0
+ *
+ * @return Java表示的JSON对象,如果转型失败,将抛出异常
+ */
+ @SuppressWarnings("unchecked")
+ public T get(final String path, Class clazz) {
+ this.checkPath(path);
+ return (T) this.get(path);
+ }
+
+ /**
+ * 格式化Configuration输出
+ */
+ public String beautify() {
+ return JSON.toJSONString(this.getInternal(),
+ SerializerFeature.PrettyFormat);
+ }
+
+ /**
+ * 根据用户提供的json path,插入指定对象,并返回之前存在的对象(如果存在)
+ *
+ *
+ *
+ * 目前仅支持.以及数组下标寻址, 例如:
+ *
+ *
+ *
+ * config.set("a.b.c[3]", object);
+ *
+ *
+ * 对于插入对象,Configuration不做任何限制,但是请务必保证该对象是简单对象(包括Map、List),不要使用自定义对象,否则后续对于JSON序列化等情况会出现未定义行为。
+ *
+ * @param path
+ * JSON path对象
+ * @param object
+ * 需要插入的对象
+ * @return Java表示的JSON对象
+ */
+ public Object set(final String path, final Object object) {
+ checkPath(path);
+
+ Object result = this.get(path);
+
+ setObject(path, extractConfiguration(object));
+
+ return result;
+ }
+
+ /**
+ * 获取Configuration下所有叶子节点的key
+ *
+ *
+ *
+ * 对于
+ *
+ * {"a": {"b": {"c": [0,1,2,3]}}, "x": "y"}
+ *
+ * 下属的key包括: a.b.c[0],a.b.c[1],a.b.c[2],a.b.c[3],x
+ */
+ public Set getKeys() {
+ Set collect = new HashSet();
+ this.getKeysRecursive(this.getInternal(), "", collect);
+ return collect;
+ }
+
+ /**
+ * 删除path对应的值,如果path不存在,将抛出异常。
+ */
+ public Object remove(final String path) {
+ final Object result = this.get(path);
+ if (null == result) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.RUNTIME_ERROR,
+ String.format("配置文件对应Key[%s]并不存在,该情况是代码编程错误. 请联系DataX团队的同学.", path));
+ }
+
+ this.set(path, null);
+ return result;
+ }
+
+ /**
+ * 合并其他Configuration,并修改两者冲突的KV配置
+ *
+ * @param another
+ * 合并加入的第三方Configuration
+ * @param updateWhenConflict
+ * 当合并双方出现KV冲突时候,选择更新当前KV,或者忽略该KV
+ * @return 返回合并后对象
+ */
+ public Configuration merge(final Configuration another,
+ boolean updateWhenConflict) {
+ Set keys = another.getKeys();
+
+ for (final String key : keys) {
+ // 如果使用更新策略,凡是another存在的key,均需要更新
+ if (updateWhenConflict) {
+ this.set(key, another.get(key));
+ continue;
+ }
+
+ // 使用忽略策略,只有another Configuration存在但是当前Configuration不存在的key,才需要更新
+ boolean isCurrentExists = this.get(key) != null;
+ if (isCurrentExists) {
+ continue;
+ }
+
+ this.set(key, another.get(key));
+ }
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ return this.toJSON();
+ }
+
+ /**
+ * 将Configuration作为JSON输出
+ */
+ public String toJSON() {
+ return Configuration.toJSONString(this.getInternal());
+ }
+
+ /**
+ * 拷贝当前Configuration,注意,这里使用了深拷贝,避免冲突
+ */
+ public Configuration clone() {
+ Configuration config = Configuration
+ .from(Configuration.toJSONString(this.getInternal()));
+ config.addSecretKeyPath(this.secretKeyPathSet);
+ return config;
+ }
+
+ /**
+ * 按照configuration要求格式的path
+ * 比如:
+ * a.b.c
+ * a.b[2].c
+ * @param path
+ */
+ public void addSecretKeyPath(String path) {
+ if(StringUtils.isNotBlank(path)) {
+ this.secretKeyPathSet.add(path);
+ }
+ }
+
+ public void addSecretKeyPath(Set pathSet) {
+ if(pathSet != null) {
+ this.secretKeyPathSet.addAll(pathSet);
+ }
+ }
+
+ public void setSecretKeyPathSet(Set keyPathSet) {
+ if(keyPathSet != null) {
+ this.secretKeyPathSet = keyPathSet;
+ }
+ }
+
+ public boolean isSecretPath(String path) {
+ return this.secretKeyPathSet.contains(path);
+ }
+
+ @SuppressWarnings("unchecked")
+ void getKeysRecursive(final Object current, String path, Set collect) {
+ boolean isRegularElement = !(current instanceof Map || current instanceof List);
+ if (isRegularElement) {
+ collect.add(path);
+ return;
+ }
+
+ boolean isMap = current instanceof Map;
+ if (isMap) {
+ Map mapping = ((Map) current);
+ for (final String key : mapping.keySet()) {
+ if (StringUtils.isBlank(path)) {
+ getKeysRecursive(mapping.get(key), key.trim(), collect);
+ } else {
+ getKeysRecursive(mapping.get(key), path + "." + key.trim(),
+ collect);
+ }
+ }
+ return;
+ }
+
+ boolean isList = current instanceof List;
+ if (isList) {
+ List lists = (List) current;
+ for (int i = 0; i < lists.size(); i++) {
+ getKeysRecursive(lists.get(i), path + String.format("[%d]", i),
+ collect);
+ }
+ return;
+ }
+
+ return;
+ }
+
+ public Object getInternal() {
+ return this.root;
+ }
+
+ private void setObject(final String path, final Object object) {
+ Object newRoot = setObjectRecursive(this.root, split2List(path), 0,
+ object);
+
+ if (isSuitForRoot(newRoot)) {
+ this.root = newRoot;
+ return;
+ }
+
+ throw DataXException.asDataXException(CommonErrorCode.RUNTIME_ERROR,
+ String.format("值[%s]无法适配您提供[%s], 该异常代表系统编程错误, 请联系DataX开发团队!",
+ ToStringBuilder.reflectionToString(object), path));
+ }
+
+ @SuppressWarnings("unchecked")
+ private Object extractConfiguration(final Object object) {
+ if (object instanceof Configuration) {
+ return extractFromConfiguration(object);
+ }
+
+ if (object instanceof List) {
+ List result = new ArrayList();
+ for (final Object each : (List) object) {
+ result.add(extractFromConfiguration(each));
+ }
+ return result;
+ }
+
+ if (object instanceof Map) {
+ Map result = new HashMap();
+ for (final String key : ((Map) object).keySet()) {
+ result.put(key,
+ extractFromConfiguration(((Map) object)
+ .get(key)));
+ }
+ return result;
+ }
+
+ return object;
+ }
+
+ private Object extractFromConfiguration(final Object object) {
+ if (object instanceof Configuration) {
+ return ((Configuration) object).getInternal();
+ }
+
+ return object;
+ }
+
+ Object buildObject(final List paths, final Object object) {
+ if (null == paths) {
+ throw DataXException.asDataXException(
+ CommonErrorCode.RUNTIME_ERROR,
+ "Path不能为null,该异常代表系统编程错误, 请联系DataX开发团队 !");
+ }
+
+ if (1 == paths.size() && StringUtils.isBlank(paths.get(0))) {
+ return object;
+ }
+
+ Object child = object;
+ for (int i = paths.size() - 1; i >= 0; i--) {
+ String path = paths.get(i);
+
+ if (isPathMap(path)) {
+ Map mapping = new HashMap();
+ mapping.put(path, child);
+ child = mapping;
+ continue;
+ }
+
+ if (isPathList(path)) {
+ List lists = new ArrayList(
+ this.getIndex(path) + 1);
+ expand(lists, this.getIndex(path) + 1);
+ lists.set(this.getIndex(path), child);
+ child = lists;
+ continue;
+ }
+
+ throw DataXException.asDataXException(
+ CommonErrorCode.RUNTIME_ERROR, String.format(
+ "路径[%s]出现非法值类型[%s],该异常代表系统编程错误, 请联系DataX开发团队! .",
+ StringUtils.join(paths, "."), path));
+ }
+
+ return child;
+ }
+
+ @SuppressWarnings("unchecked")
+ Object setObjectRecursive(Object current, final List paths,
+ int index, final Object value) {
+
+ // 如果是已经超出path,我们就返回value即可,作为最底层叶子节点
+ boolean isLastIndex = index == paths.size();
+ if (isLastIndex) {
+ return value;
+ }
+
+ String path = paths.get(index).trim();
+ boolean isNeedMap = isPathMap(path);
+ if (isNeedMap) {
+ Map mapping;
+
+ // 当前不是map,因此全部替换为map,并返回新建的map对象
+ boolean isCurrentMap = current instanceof Map;
+ if (!isCurrentMap) {
+ mapping = new HashMap();
+ mapping.put(
+ path,
+ buildObject(paths.subList(index + 1, paths.size()),
+ value));
+ return mapping;
+ }
+
+ // 当前是map,但是没有对应的key,也就是我们需要新建对象插入该map,并返回该map
+ mapping = ((Map) current);
+ boolean hasSameKey = mapping.containsKey(path);
+ if (!hasSameKey) {
+ mapping.put(
+ path,
+ buildObject(paths.subList(index + 1, paths.size()),
+ value));
+ return mapping;
+ }
+
+ // 当前是map,而且还竟然存在这个值,好吧,继续递归遍历
+ current = mapping.get(path);
+ mapping.put(path,
+ setObjectRecursive(current, paths, index + 1, value));
+ return mapping;
+ }
+
+ boolean isNeedList = isPathList(path);
+ if (isNeedList) {
+ List lists;
+ int listIndexer = getIndex(path);
+
+ // 当前是list,直接新建并返回即可
+ boolean isCurrentList = current instanceof List;
+ if (!isCurrentList) {
+ lists = expand(new ArrayList(), listIndexer + 1);
+ lists.set(
+ listIndexer,
+ buildObject(paths.subList(index + 1, paths.size()),
+ value));
+ return lists;
+ }
+
+ // 当前是list,但是对应的indexer是没有具体的值,也就是我们新建对象然后插入到该list,并返回该List
+ lists = (List) current;
+ lists = expand(lists, listIndexer + 1);
+
+ boolean hasSameIndex = lists.get(listIndexer) != null;
+ if (!hasSameIndex) {
+ lists.set(
+ listIndexer,
+ buildObject(paths.subList(index + 1, paths.size()),
+ value));
+ return lists;
+ }
+
+ // 当前是list,并且存在对应的index,没有办法继续递归寻找
+ current = lists.get(listIndexer);
+ lists.set(listIndexer,
+ setObjectRecursive(current, paths, index + 1, value));
+ return lists;
+ }
+
+ throw DataXException.asDataXException(CommonErrorCode.RUNTIME_ERROR,
+ "该异常代表系统编程错误, 请联系DataX开发团队 !");
+ }
+
+ private Object findObject(final String path) {
+ boolean isRootQuery = StringUtils.isBlank(path);
+ if (isRootQuery) {
+ return this.root;
+ }
+
+ Object target = this.root;
+
+ for (final String each : split2List(path)) {
+ if (isPathMap(each)) {
+ target = findObjectInMap(target, each);
+ continue;
+ } else {
+ target = findObjectInList(target, each);
+ continue;
+ }
+ }
+
+ return target;
+ }
+
+ @SuppressWarnings("unchecked")
+ private Object findObjectInMap(final Object target, final String index) {
+ boolean isMap = (target instanceof Map);
+ if (!isMap) {
+ throw new IllegalArgumentException(String.format(
+ "您提供的配置文件有误. 路径[%s]需要配置Json格式的Map对象,但该节点发现实际类型是[%s]. 请检查您的配置并作出修改.",
+ index, target.getClass().toString()));
+ }
+
+ Object result = ((Map) target).get(index);
+ if (null == result) {
+ throw new IllegalArgumentException(String.format(
+ "您提供的配置文件有误. 路径[%s]值为null,datax无法识别该配置. 请检查您的配置并作出修改.", index));
+ }
+
+ return result;
+ }
+
+ @SuppressWarnings({ "unchecked" })
+ private Object findObjectInList(final Object target, final String each) {
+ boolean isList = (target instanceof List);
+ if (!isList) {
+ throw new IllegalArgumentException(String.format(
+ "您提供的配置文件有误. 路径[%s]需要配置Json格式的Map对象,但该节点发现实际类型是[%s]. 请检查您的配置并作出修改.",
+ each, target.getClass().toString()));
+ }
+
+ String index = each.replace("[", "").replace("]", "");
+ if (!StringUtils.isNumeric(index)) {
+ throw new IllegalArgumentException(
+ String.format(
+ "系统编程错误,列表下标必须为数字类型,但该节点发现实际类型是[%s] ,该异常代表系统编程错误, 请联系DataX开发团队 !",
+ index));
+ }
+
+ return ((List) target).get(Integer.valueOf(index));
+ }
+
+ private List expand(List list, int size) {
+ int expand = size - list.size();
+ while (expand-- > 0) {
+ list.add(null);
+ }
+ return list;
+ }
+
+ private boolean isPathList(final String path) {
+ return path.contains("[") && path.contains("]");
+ }
+
+ private boolean isPathMap(final String path) {
+ return StringUtils.isNotBlank(path) && !isPathList(path);
+ }
+
+ private int getIndex(final String index) {
+ return Integer.valueOf(index.replace("[", "").replace("]", ""));
+ }
+
+ private boolean isSuitForRoot(final Object object) {
+ if (null != object && (object instanceof List || object instanceof Map)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ private String split(final String path) {
+ return StringUtils.replace(path, "[", ".[");
+ }
+
+ private List split2List(final String path) {
+ return Arrays.asList(StringUtils.split(split(path), "."));
+ }
+
+ private void checkPath(final String path) {
+ if (null == path) {
+ throw new IllegalArgumentException(
+ "系统编程错误, 该异常代表系统编程错误, 请联系DataX开发团队!.");
+ }
+
+ for (final String each : StringUtils.split(".")) {
+ if (StringUtils.isBlank(each)) {
+ throw new IllegalArgumentException(String.format(
+ "系统编程错误, 路径[%s]不合法, 路径层次之间不能出现空白字符 .", path));
+ }
+ }
+ }
+
+ @SuppressWarnings("unused")
+ private String toJSONPath(final String path) {
+ return (StringUtils.isBlank(path) ? "$" : "$." + path).replace("$.[",
+ "$[");
+ }
+
+ private static void checkJSON(final String json) {
+ if (StringUtils.isBlank(json)) {
+ throw DataXException.asDataXException(CommonErrorCode.CONFIG_ERROR,
+ "配置信息错误. 因为您提供的配置信息不是合法的JSON格式, JSON不能为空白. 请按照标准json格式提供配置信息. ");
+ }
+ }
+
+ private Configuration(final String json) {
+ try {
+ this.root = JSON.parse(json);
+ } catch (Exception e) {
+ throw DataXException.asDataXException(CommonErrorCode.CONFIG_ERROR,
+ String.format("配置信息错误. 您提供的配置信息不是合法的JSON格式: %s . 请按照标准json格式提供配置信息. ", e.getMessage()));
+ }
+ }
+
+ private static String toJSONString(final Object object) {
+ return JSON.toJSONString(object);
+ }
+
+ public Set getSecretKeyPathSet() {
+ return secretKeyPathSet;
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/util/FilterUtil.java b/common/src/main/java/com/alibaba/datax/common/util/FilterUtil.java
new file mode 100755
index 0000000000..37b319a194
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/util/FilterUtil.java
@@ -0,0 +1,52 @@
+package com.alibaba.datax.common.util;
+
+import java.util.*;
+import java.util.regex.Pattern;
+
+/**
+ * 提供从 List 中根据 regular 过滤的通用工具(返回值已经去重). 使用场景,比如:odpsreader
+ * 的分区筛选,hdfsreader/txtfilereader的路径筛选等
+ */
+public final class FilterUtil {
+
+ //已经去重
+ public static List filterByRegular(List allStrs,
+ String regular) {
+ List matchedValues = new ArrayList();
+
+ // 语法习惯上的兼容处理(pt=* 实际正则应该是:pt=.*)
+ String newReqular = regular.replace(".*", "*").replace("*", ".*");
+
+ Pattern p = Pattern.compile(newReqular);
+
+ for (String partition : allStrs) {
+ if (p.matcher(partition).matches()) {
+ if (!matchedValues.contains(partition)) {
+ matchedValues.add(partition);
+ }
+ }
+ }
+
+ return matchedValues;
+ }
+
+ //已经去重
+ public static List filterByRegulars(List allStrs,
+ List regulars) {
+ List matchedValues = new ArrayList();
+
+ List tempMatched = null;
+ for (String regular : regulars) {
+ tempMatched = filterByRegular(allStrs, regular);
+ if (null != tempMatched && !tempMatched.isEmpty()) {
+ for (String temp : tempMatched) {
+ if (!matchedValues.contains(temp)) {
+ matchedValues.add(temp);
+ }
+ }
+ }
+ }
+
+ return matchedValues;
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/util/HostUtils.java b/common/src/main/java/com/alibaba/datax/common/util/HostUtils.java
new file mode 100644
index 0000000000..2ed8f1019c
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/util/HostUtils.java
@@ -0,0 +1,49 @@
+package com.alibaba.datax.common.util;
+
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+
+/**
+ * Created by liqiang on 15/8/25.
+ */
+public class HostUtils {
+
+ public static final String IP;
+ public static final String HOSTNAME;
+ private static final Logger log = LoggerFactory.getLogger(HostUtils.class);
+
+ static {
+ String ip;
+ String hostname;
+ try {
+ InetAddress addr = InetAddress.getLocalHost();
+ ip = addr.getHostAddress();
+ hostname = addr.getHostName();
+ } catch (UnknownHostException e) {
+ log.error("Can't find out address: " + e.getMessage());
+ ip = "UNKNOWN";
+ hostname = "UNKNOWN";
+ }
+ if (ip.equals("127.0.0.1") || ip.equals("::1") || ip.equals("UNKNOWN")) {
+ try {
+ Process process = Runtime.getRuntime().exec("hostname -i");
+ if (process.waitFor() == 0) {
+ ip = new String(IOUtils.toByteArray(process.getInputStream()), "UTF8");
+ }
+ process = Runtime.getRuntime().exec("hostname");
+ if (process.waitFor() == 0) {
+ hostname = (new String(IOUtils.toByteArray(process.getInputStream()), "UTF8")).trim();
+ }
+ } catch (Exception e) {
+ log.warn("get hostname failed {}", e.getMessage());
+ }
+ }
+ IP = ip;
+ HOSTNAME = hostname;
+ log.info("IP {} HOSTNAME {}", IP, HOSTNAME);
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java b/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java
new file mode 100755
index 0000000000..d7a5b76462
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java
@@ -0,0 +1,139 @@
+package com.alibaba.datax.common.util;
+
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * 提供针对 DataX中使用的 List 较为常见的一些封装。 比如:checkIfValueDuplicate 可以用于检查用户配置的 writer
+ * 的列不能重复。makeSureNoValueDuplicate亦然,只是会严格报错。
+ */
+public final class ListUtil {
+
+ public static boolean checkIfValueDuplicate(List aList,
+ boolean caseSensitive) {
+ if (null == aList || aList.isEmpty()) {
+ throw DataXException.asDataXException(CommonErrorCode.CONFIG_ERROR,
+ "您提供的作业配置有误,List不能为空.");
+ }
+
+ try {
+ makeSureNoValueDuplicate(aList, caseSensitive);
+ } catch (Exception e) {
+ return true;
+ }
+ return false;
+ }
+
+ public static void makeSureNoValueDuplicate(List aList,
+ boolean caseSensitive) {
+ if (null == aList || aList.isEmpty()) {
+ throw new IllegalArgumentException("您提供的作业配置有误, List不能为空.");
+ }
+
+ if (1 == aList.size()) {
+ return;
+ } else {
+ List list = null;
+ if (!caseSensitive) {
+ list = valueToLowerCase(aList);
+ } else {
+ list = new ArrayList(aList);
+ }
+
+ Collections.sort(list);
+
+ for (int i = 0, len = list.size() - 1; i < len; i++) {
+ if (list.get(i).equals(list.get(i + 1))) {
+ throw DataXException
+ .asDataXException(
+ CommonErrorCode.CONFIG_ERROR,
+ String.format(
+ "您提供的作业配置信息有误, String:[%s] 不允许重复出现在列表中: [%s].",
+ list.get(i),
+ StringUtils.join(aList, ",")));
+ }
+ }
+ }
+ }
+
+ public static boolean checkIfBInA(List aList, List bList,
+ boolean caseSensitive) {
+ if (null == aList || aList.isEmpty() || null == bList
+ || bList.isEmpty()) {
+ throw new IllegalArgumentException("您提供的作业配置有误, List不能为空.");
+ }
+
+ try {
+ makeSureBInA(aList, bList, caseSensitive);
+ } catch (Exception e) {
+ return false;
+ }
+ return true;
+ }
+
+ public static void makeSureBInA(List aList, List bList,
+ boolean caseSensitive) {
+ if (null == aList || aList.isEmpty() || null == bList
+ || bList.isEmpty()) {
+ throw new IllegalArgumentException("您提供的作业配置有误, List不能为空.");
+ }
+
+ List all = null;
+ List part = null;
+
+ if (!caseSensitive) {
+ all = valueToLowerCase(aList);
+ part = valueToLowerCase(bList);
+ } else {
+ all = new ArrayList(aList);
+ part = new ArrayList(bList);
+ }
+
+ for (String oneValue : part) {
+ if (!all.contains(oneValue)) {
+ throw DataXException
+ .asDataXException(
+ CommonErrorCode.CONFIG_ERROR,
+ String.format(
+ "您提供的作业配置信息有误, String:[%s] 不存在于列表中:[%s].",
+ oneValue, StringUtils.join(aList, ",")));
+ }
+ }
+
+ }
+
+ public static boolean checkIfValueSame(List aList) {
+ if (null == aList || aList.isEmpty()) {
+ throw new IllegalArgumentException("您提供的作业配置有误, List不能为空.");
+ }
+
+ if (1 == aList.size()) {
+ return true;
+ } else {
+ Boolean firstValue = aList.get(0);
+ for (int i = 1, len = aList.size(); i < len; i++) {
+ if (firstValue.booleanValue() != aList.get(i).booleanValue()) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+
+ public static List valueToLowerCase(List aList) {
+ if (null == aList || aList.isEmpty()) {
+ throw new IllegalArgumentException("您提供的作业配置有误, List不能为空.");
+ }
+ List result = new ArrayList(aList.size());
+ for (String oneValue : aList) {
+ result.add(null != oneValue ? oneValue.toLowerCase() : null);
+ }
+
+ return result;
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/util/RangeSplitUtil.java b/common/src/main/java/com/alibaba/datax/common/util/RangeSplitUtil.java
new file mode 100755
index 0000000000..791f9ea12c
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/util/RangeSplitUtil.java
@@ -0,0 +1,209 @@
+package com.alibaba.datax.common.util;
+
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+
+import java.math.BigInteger;
+import java.util.*;
+
+/**
+ * 提供通用的根据数字范围、字符串范围等进行切分的通用功能.
+ */
+public final class RangeSplitUtil {
+
+ public static String[] doAsciiStringSplit(String left, String right, int expectSliceNumber) {
+ int radix = 128;
+
+ BigInteger[] tempResult = doBigIntegerSplit(stringToBigInteger(left, radix),
+ stringToBigInteger(right, radix), expectSliceNumber);
+ String[] result = new String[tempResult.length];
+
+ //处理第一个字符串(因为:在转换为数字,再还原的时候,如果首字符刚好是 basic,则不知道应该添加多少个 basic)
+ result[0] = left;
+ result[tempResult.length - 1] = right;
+
+ for (int i = 1, len = tempResult.length - 1; i < len; i++) {
+ result[i] = bigIntegerToString(tempResult[i], radix);
+ }
+
+ return result;
+ }
+
+
+ public static long[] doLongSplit(long left, long right, int expectSliceNumber) {
+ BigInteger[] result = doBigIntegerSplit(BigInteger.valueOf(left),
+ BigInteger.valueOf(right), expectSliceNumber);
+ long[] returnResult = new long[result.length];
+ for (int i = 0, len = result.length; i < len; i++) {
+ returnResult[i] = result[i].longValue();
+ }
+ return returnResult;
+ }
+
+ public static BigInteger[] doBigIntegerSplit(BigInteger left, BigInteger right, int expectSliceNumber) {
+ if (expectSliceNumber < 1) {
+ throw new IllegalArgumentException(String.format(
+ "切分份数不能小于1. 此处:expectSliceNumber=[%s].", expectSliceNumber));
+ }
+
+ if (null == left || null == right) {
+ throw new IllegalArgumentException(String.format(
+ "对 BigInteger 进行切分时,其左右区间不能为 null. 此处:left=[%s],right=[%s].", left, right));
+ }
+
+ if (left.compareTo(right) == 0) {
+ return new BigInteger[]{left, right};
+ } else {
+ // 调整大小顺序,确保 left < right
+ if (left.compareTo(right) > 0) {
+ BigInteger temp = left;
+ left = right;
+ right = temp;
+ }
+
+ //left < right
+ BigInteger endAndStartGap = right.subtract(left);
+
+ BigInteger step = endAndStartGap.divide(BigInteger.valueOf(expectSliceNumber));
+ BigInteger remainder = endAndStartGap.remainder(BigInteger.valueOf(expectSliceNumber));
+
+ //remainder 不可能超过expectSliceNumber,所以不需要检查remainder的 Integer 的范围
+
+ // 这里不能 step.intValue()==0,因为可能溢出
+ if (step.compareTo(BigInteger.ZERO) == 0) {
+ expectSliceNumber = remainder.intValue();
+ }
+
+ BigInteger[] result = new BigInteger[expectSliceNumber + 1];
+ result[0] = left;
+ result[expectSliceNumber] = right;
+
+ BigInteger lowerBound;
+ BigInteger upperBound = left;
+ for (int i = 1; i < expectSliceNumber; i++) {
+ lowerBound = upperBound;
+ upperBound = lowerBound.add(step);
+ upperBound = upperBound.add((remainder.compareTo(BigInteger.valueOf(i)) >= 0)
+ ? BigInteger.ONE : BigInteger.ZERO);
+ result[i] = upperBound;
+ }
+
+ return result;
+ }
+ }
+
+ private static void checkIfBetweenRange(int value, int left, int right) {
+ if (value < left || value > right) {
+ throw new IllegalArgumentException(String.format("parameter can not <[%s] or >[%s].",
+ left, right));
+ }
+ }
+
+ /**
+ * 由于只支持 ascii 码对应字符,所以radix 范围为[1,128]
+ */
+ public static BigInteger stringToBigInteger(String aString, int radix) {
+ if (null == aString) {
+ throw new IllegalArgumentException("参数 bigInteger 不能为空.");
+ }
+
+ checkIfBetweenRange(radix, 1, 128);
+
+ BigInteger result = BigInteger.ZERO;
+ BigInteger radixBigInteger = BigInteger.valueOf(radix);
+
+ int tempChar;
+ int k = 0;
+
+ for (int i = aString.length() - 1; i >= 0; i--) {
+ tempChar = aString.charAt(i);
+ if (tempChar >= 128) {
+ throw new IllegalArgumentException(String.format("根据字符串进行切分时仅支持 ASCII 字符串,而字符串:[%s]非 ASCII 字符串.", aString));
+ }
+ result = result.add(BigInteger.valueOf(tempChar).multiply(radixBigInteger.pow(k)));
+ k++;
+ }
+
+ return result;
+ }
+
+ /**
+ * 把BigInteger 转换为 String.注意:radix 和 basic 范围都为[1,128], radix + basic 的范围也必须在[1,128].
+ */
+ private static String bigIntegerToString(BigInteger bigInteger, int radix) {
+ if (null == bigInteger) {
+ throw new IllegalArgumentException("参数 bigInteger 不能为空.");
+ }
+
+ checkIfBetweenRange(radix, 1, 128);
+
+ StringBuilder resultStringBuilder = new StringBuilder();
+
+ List list = new ArrayList();
+ BigInteger radixBigInteger = BigInteger.valueOf(radix);
+ BigInteger currentValue = bigInteger;
+
+ BigInteger quotient = currentValue.divide(radixBigInteger);
+ while (quotient.compareTo(BigInteger.ZERO) > 0) {
+ list.add(currentValue.remainder(radixBigInteger).intValue());
+ currentValue = currentValue.divide(radixBigInteger);
+ quotient = currentValue;
+ }
+ Collections.reverse(list);
+
+ if (list.isEmpty()) {
+ list.add(0, bigInteger.remainder(radixBigInteger).intValue());
+ }
+
+ Map map = new HashMap();
+ for (int i = 0; i < radix; i++) {
+ map.put(i, (char) (i));
+ }
+
+// String msg = String.format("%s 转为 %s 进制,结果为:%s", bigInteger.longValue(), radix, list);
+// System.out.println(msg);
+
+ for (Integer aList : list) {
+ resultStringBuilder.append(map.get(aList));
+ }
+
+ return resultStringBuilder.toString();
+ }
+
+ /**
+ * 获取字符串中的最小字符和最大字符(依据 ascii 进行判断).要求字符串必须非空,并且为 ascii 字符串.
+ * 返回的Pair,left=最小字符,right=最大字符.
+ */
+ public static Pair getMinAndMaxCharacter(String aString) {
+ if (!isPureAscii(aString)) {
+ throw new IllegalArgumentException(String.format("根据字符串进行切分时仅支持 ASCII 字符串,而字符串:[%s]非 ASCII 字符串.", aString));
+ }
+
+ char min = aString.charAt(0);
+ char max = min;
+
+ char temp;
+ for (int i = 1, len = aString.length(); i < len; i++) {
+ temp = aString.charAt(i);
+ min = min < temp ? min : temp;
+ max = max > temp ? max : temp;
+ }
+
+ return new ImmutablePair(min, max);
+ }
+
+ private static boolean isPureAscii(String aString) {
+ if (null == aString) {
+ return false;
+ }
+
+ for (int i = 0, len = aString.length(); i < len; i++) {
+ char ch = aString.charAt(i);
+ if (ch >= 127 || ch < 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/util/RetryUtil.java b/common/src/main/java/com/alibaba/datax/common/util/RetryUtil.java
new file mode 100755
index 0000000000..33c712874b
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/util/RetryUtil.java
@@ -0,0 +1,208 @@
+package com.alibaba.datax.common.util;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.concurrent.*;
+
+public final class RetryUtil {
+
+ private static final Logger LOG = LoggerFactory.getLogger(RetryUtil.class);
+
+ private static final long MAX_SLEEP_MILLISECOND = 256 * 1000;
+
+ /**
+ * 重试次数工具方法.
+ *
+ * @param callable 实际逻辑
+ * @param retryTimes 最大重试次数(>1)
+ * @param sleepTimeInMilliSecond 运行失败后休眠对应时间再重试
+ * @param exponential 休眠时间是否指数递增
+ * @param 返回值类型
+ * @return 经过重试的callable的执行结果
+ */
+ public static T executeWithRetry(Callable callable,
+ int retryTimes,
+ long sleepTimeInMilliSecond,
+ boolean exponential) throws Exception {
+ Retry retry = new Retry();
+ return retry.doRetry(callable, retryTimes, sleepTimeInMilliSecond, exponential, null);
+ }
+
+ /**
+ * 重试次数工具方法.
+ *
+ * @param callable 实际逻辑
+ * @param retryTimes 最大重试次数(>1)
+ * @param sleepTimeInMilliSecond 运行失败后休眠对应时间再重试
+ * @param exponential 休眠时间是否指数递增
+ * @param 返回值类型
+ * @param retryExceptionClasss 出现指定的异常类型时才进行重试
+ * @return 经过重试的callable的执行结果
+ */
+ public static T executeWithRetry(Callable callable,
+ int retryTimes,
+ long sleepTimeInMilliSecond,
+ boolean exponential,
+ List> retryExceptionClasss) throws Exception {
+ Retry retry = new Retry();
+ return retry.doRetry(callable, retryTimes, sleepTimeInMilliSecond, exponential, retryExceptionClasss);
+ }
+
+ /**
+ * 在外部线程执行并且重试。每次执行需要在timeoutMs内执行完,不然视为失败。
+ * 执行异步操作的线程池从外部传入,线程池的共享粒度由外部控制。比如,HttpClientUtil共享一个线程池。
+ *
+ * 限制条件:仅仅能够在阻塞的时候interrupt线程
+ *
+ * @param callable 实际逻辑
+ * @param retryTimes 最大重试次数(>1)
+ * @param sleepTimeInMilliSecond 运行失败后休眠对应时间再重试
+ * @param exponential 休眠时间是否指数递增
+ * @param timeoutMs callable执行超时时间,毫秒
+ * @param executor 执行异步操作的线程池
+ * @param 返回值类型
+ * @return 经过重试的callable的执行结果
+ */
+ public static T asyncExecuteWithRetry(Callable callable,
+ int retryTimes,
+ long sleepTimeInMilliSecond,
+ boolean exponential,
+ long timeoutMs,
+ ThreadPoolExecutor executor) throws Exception {
+ Retry retry = new AsyncRetry(timeoutMs, executor);
+ return retry.doRetry(callable, retryTimes, sleepTimeInMilliSecond, exponential, null);
+ }
+
+ /**
+ * 创建异步执行的线程池。特性如下:
+ * core大小为0,初始状态下无线程,无初始消耗。
+ * max大小为5,最多五个线程。
+ * 60秒超时时间,闲置超过60秒线程会被回收。
+ * 使用SynchronousQueue,任务不会排队,必须要有可用线程才能提交成功,否则会RejectedExecutionException。
+ *
+ * @return 线程池
+ */
+ public static ThreadPoolExecutor createThreadPoolExecutor() {
+ return new ThreadPoolExecutor(0, 5,
+ 60L, TimeUnit.SECONDS,
+ new SynchronousQueue());
+ }
+
+
+ private static class Retry {
+
+ public T doRetry(Callable callable, int retryTimes, long sleepTimeInMilliSecond, boolean exponential, List> retryExceptionClasss)
+ throws Exception {
+
+ if (null == callable) {
+ throw new IllegalArgumentException("系统编程错误, 入参callable不能为空 ! ");
+ }
+
+ if (retryTimes < 1) {
+ throw new IllegalArgumentException(String.format(
+ "系统编程错误, 入参retrytime[%d]不能小于1 !", retryTimes));
+ }
+
+ Exception saveException = null;
+ for (int i = 0; i < retryTimes; i++) {
+ try {
+ return call(callable);
+ } catch (Exception e) {
+ saveException = e;
+ if (i == 0) {
+ LOG.error(String.format("Exception when calling callable, 异常Msg:%s", saveException.getMessage()), saveException);
+ }
+
+ if (null != retryExceptionClasss && !retryExceptionClasss.isEmpty()) {
+ boolean needRetry = false;
+ for (Class> eachExceptionClass : retryExceptionClasss) {
+ if (eachExceptionClass == e.getClass()) {
+ needRetry = true;
+ break;
+ }
+ }
+ if (!needRetry) {
+ throw saveException;
+ }
+ }
+
+ if (i + 1 < retryTimes && sleepTimeInMilliSecond > 0) {
+ long startTime = System.currentTimeMillis();
+
+ long timeToSleep;
+ if (exponential) {
+ timeToSleep = sleepTimeInMilliSecond * (long) Math.pow(2, i);
+ if(timeToSleep >= MAX_SLEEP_MILLISECOND) {
+ timeToSleep = MAX_SLEEP_MILLISECOND;
+ }
+ } else {
+ timeToSleep = sleepTimeInMilliSecond;
+ if(timeToSleep >= MAX_SLEEP_MILLISECOND) {
+ timeToSleep = MAX_SLEEP_MILLISECOND;
+ }
+ }
+
+ try {
+ Thread.sleep(timeToSleep);
+ } catch (InterruptedException ignored) {
+ }
+
+ long realTimeSleep = System.currentTimeMillis()-startTime;
+
+ LOG.error(String.format("Exception when calling callable, 即将尝试执行第%s次重试.本次重试计划等待[%s]ms,实际等待[%s]ms, 异常Msg:[%s]",
+ i+1, timeToSleep,realTimeSleep, e.getMessage()));
+
+ }
+ }
+ }
+ throw saveException;
+ }
+
+ protected T call(Callable callable) throws Exception {
+ return callable.call();
+ }
+ }
+
+ private static class AsyncRetry extends Retry {
+
+ private long timeoutMs;
+ private ThreadPoolExecutor executor;
+
+ public AsyncRetry(long timeoutMs, ThreadPoolExecutor executor) {
+ this.timeoutMs = timeoutMs;
+ this.executor = executor;
+ }
+
+ /**
+ * 使用传入的线程池异步执行任务,并且等待。
+ *
+ * future.get()方法,等待指定的毫秒数。如果任务在超时时间内结束,则正常返回。
+ * 如果抛异常(可能是执行超时、执行异常、被其他线程cancel或interrupt),都记录日志并且网上抛异常。
+ * 正常和非正常的情况都会判断任务是否结束,如果没有结束,则cancel任务。cancel参数为true,表示即使
+ * 任务正在执行,也会interrupt线程。
+ *
+ * @param callable
+ * @param
+ * @return
+ * @throws Exception
+ */
+ @Override
+ protected T call(Callable callable) throws Exception {
+ Future future = executor.submit(callable);
+ try {
+ return future.get(timeoutMs, TimeUnit.MILLISECONDS);
+ } catch (Exception e) {
+ LOG.warn("Try once failed", e);
+ throw e;
+ } finally {
+ if (!future.isDone()) {
+ future.cancel(true);
+ LOG.warn("Try once task not done, cancel it, active count: " + executor.getActiveCount());
+ }
+ }
+ }
+ }
+
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/util/StrUtil.java b/common/src/main/java/com/alibaba/datax/common/util/StrUtil.java
new file mode 100755
index 0000000000..82222b0d48
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/util/StrUtil.java
@@ -0,0 +1,85 @@
+package com.alibaba.datax.common.util;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.Validate;
+
+import java.text.DecimalFormat;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class StrUtil {
+
+ private final static long KB_IN_BYTES = 1024;
+
+ private final static long MB_IN_BYTES = 1024 * KB_IN_BYTES;
+
+ private final static long GB_IN_BYTES = 1024 * MB_IN_BYTES;
+
+ private final static long TB_IN_BYTES = 1024 * GB_IN_BYTES;
+
+ private final static DecimalFormat df = new DecimalFormat("0.00");
+
+ private static final Pattern VARIABLE_PATTERN = Pattern
+ .compile("(\\$)\\{?(\\w+)\\}?");
+
+ private static String SYSTEM_ENCODING = System.getProperty("file.encoding");
+
+ static {
+ if (SYSTEM_ENCODING == null) {
+ SYSTEM_ENCODING = "UTF-8";
+ }
+ }
+
+ private StrUtil() {
+ }
+
+ public static String stringify(long byteNumber) {
+ if (byteNumber / TB_IN_BYTES > 0) {
+ return df.format((double) byteNumber / (double) TB_IN_BYTES) + "TB";
+ } else if (byteNumber / GB_IN_BYTES > 0) {
+ return df.format((double) byteNumber / (double) GB_IN_BYTES) + "GB";
+ } else if (byteNumber / MB_IN_BYTES > 0) {
+ return df.format((double) byteNumber / (double) MB_IN_BYTES) + "MB";
+ } else if (byteNumber / KB_IN_BYTES > 0) {
+ return df.format((double) byteNumber / (double) KB_IN_BYTES) + "KB";
+ } else {
+ return String.valueOf(byteNumber) + "B";
+ }
+ }
+
+
+ public static String replaceVariable(final String param) {
+ Map mapping = new HashMap();
+
+ Matcher matcher = VARIABLE_PATTERN.matcher(param);
+ while (matcher.find()) {
+ String variable = matcher.group(2);
+ String value = System.getProperty(variable);
+ if (StringUtils.isBlank(value)) {
+ value = matcher.group();
+ }
+ mapping.put(matcher.group(), value);
+ }
+
+ String retString = param;
+ for (final String key : mapping.keySet()) {
+ retString = retString.replace(key, mapping.get(key));
+ }
+
+ return retString;
+ }
+
+ public static String compressMiddle(String s, int headLength, int tailLength) {
+ Validate.notNull(s, "Input string must not be null");
+ Validate.isTrue(headLength > 0, "Head length must be larger than 0");
+ Validate.isTrue(tailLength > 0, "Tail length must be larger than 0");
+
+ if(headLength + tailLength >= s.length()) {
+ return s;
+ }
+ return s.substring(0, headLength) + "..." + s.substring(s.length() - tailLength);
+ }
+
+}
diff --git a/core/pom.xml b/core/pom.xml
new file mode 100755
index 0000000000..5582d943d1
--- /dev/null
+++ b/core/pom.xml
@@ -0,0 +1,150 @@
+
+ 4.0.0
+
+ com.alibaba.datax
+ datax-all
+ 0.0.1-SNAPSHOT
+
+
+ datax-core
+ datax-core
+ jar
+
+
+
+ com.alibaba.datax
+ datax-transformer
+ ${datax-project-version}
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+
+
+ commons-configuration
+ commons-configuration
+ ${commons-configuration-version}
+
+
+ commons-cli
+ commons-cli
+ ${commons-cli-version}
+
+
+ commons-beanutils
+ commons-beanutils
+ 1.9.2
+
+
+ org.apache.httpcomponents
+ httpclient
+ 4.4
+
+
+ org.apache.httpcomponents
+ fluent-hc
+ 4.4
+
+
+ org.slf4j
+ slf4j-api
+
+
+ ch.qos.logback
+ logback-classic
+
+
+ org.codehaus.janino
+ janino
+ 2.5.16
+
+
+
+ junit
+ junit
+ test
+
+
+
+ org.mockito
+ mockito-core
+ 1.8.5
+ test
+
+
+ org.powermock
+ powermock-api-mockito
+ 1.4.10
+ test
+
+
+
+ org.powermock
+ powermock-module-junit4
+ 1.4.10
+ test
+
+
+ org.apache.commons
+ commons-lang3
+ 3.3.2
+
+
+ org.codehaus.groovy
+ groovy-all
+ 2.1.9
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+
+
+
+ com.alibaba.datax.core.Engine
+
+
+
+
+
+
+ maven-assembly-plugin
+
+
+
+ com.alibaba.datax.core.Engine
+
+
+ datax
+
+ src/main/assembly/package.xml
+
+
+
+
+
+ package
+
+ single
+
+
+
+
+
+
+ maven-compiler-plugin
+
+ 1.6
+ 1.6
+ ${project-sourceEncoding}
+
+
+
+
+
diff --git a/core/src/main/assembly/package.xml b/core/src/main/assembly/package.xml
new file mode 100755
index 0000000000..7369f56351
--- /dev/null
+++ b/core/src/main/assembly/package.xml
@@ -0,0 +1,98 @@
+
+
+
+ dir
+
+ false
+
+
+
+ src/main/bin
+
+ *.*
+
+
+ *.pyc
+
+ 775
+ /bin
+
+
+
+ src/main/script
+
+ *.*
+
+ 775
+ /script
+
+
+
+ src/main/conf
+
+ *.*
+
+ /conf
+
+
+
+ target/
+
+ datax-core-0.0.1-SNAPSHOT.jar
+
+ /lib
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ src/main/job/
+
+ *.json
+
+ /job
+
+
+
+ src/main/tools/
+
+ *.*
+
+ /tools
+
+
+
+ 777
+ src/main/tmp
+
+ *.*
+
+ /tmp
+
+
+
+
+
+ false
+ /lib
+ runtime
+
+
+
diff --git a/core/src/main/bin/datax.py b/core/src/main/bin/datax.py
new file mode 100755
index 0000000000..1099ed3a08
--- /dev/null
+++ b/core/src/main/bin/datax.py
@@ -0,0 +1,227 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+
+import sys
+import os
+import signal
+import subprocess
+import time
+import re
+import socket
+import json
+from optparse import OptionParser
+from optparse import OptionGroup
+from string import Template
+import codecs
+import platform
+
+def isWindows():
+ return platform.system() == 'Windows'
+
+DATAX_HOME = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+DATAX_VERSION = 'DATAX-OPENSOURCE-3.0'
+if isWindows():
+ codecs.register(lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
+ CLASS_PATH = ("%s/lib/*") % (DATAX_HOME)
+else:
+ CLASS_PATH = ("%s/lib/*:.") % (DATAX_HOME)
+LOGBACK_FILE = ("%s/conf/logback.xml") % (DATAX_HOME)
+DEFAULT_JVM = "-Xms1g -Xmx1g -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=%s/log" % (DATAX_HOME)
+DEFAULT_PROPERTY_CONF = "-Dfile.encoding=UTF-8 -Dlogback.statusListenerClass=ch.qos.logback.core.status.NopStatusListener -Djava.security.egd=file:///dev/urandom -Ddatax.home=%s -Dlogback.configurationFile=%s" % (
+ DATAX_HOME, LOGBACK_FILE)
+ENGINE_COMMAND = "java -server ${jvm} %s -classpath %s ${params} com.alibaba.datax.core.Engine -mode ${mode} -jobid ${jobid} -job ${job}" % (
+ DEFAULT_PROPERTY_CONF, CLASS_PATH)
+REMOTE_DEBUG_CONFIG = "-Xdebug -Xrunjdwp:transport=dt_socket,server=y,address=9999"
+
+RET_STATE = {
+ "KILL": 143,
+ "FAIL": -1,
+ "OK": 0,
+ "RUN": 1,
+ "RETRY": 2
+}
+
+
+def getLocalIp():
+ try:
+ return socket.gethostbyname(socket.getfqdn(socket.gethostname()))
+ except:
+ return "Unknown"
+
+
+def suicide(signum, e):
+ global child_process
+ print >> sys.stderr, "[Error] DataX receive unexpected signal %d, starts to suicide." % (signum)
+
+ if child_process:
+ child_process.send_signal(signal.SIGQUIT)
+ time.sleep(1)
+ child_process.kill()
+ print >> sys.stderr, "DataX Process was killed ! you did ?"
+ sys.exit(RET_STATE["KILL"])
+
+
+def register_signal():
+ if not isWindows():
+ global child_process
+ signal.signal(2, suicide)
+ signal.signal(3, suicide)
+ signal.signal(15, suicide)
+
+
+def getOptionParser():
+ usage = "usage: %prog [options] job-url-or-path"
+ parser = OptionParser(usage=usage)
+
+ prodEnvOptionGroup = OptionGroup(parser, "Product Env Options",
+ "Normal user use these options to set jvm parameters, job runtime mode etc. "
+ "Make sure these options can be used in Product Env.")
+ prodEnvOptionGroup.add_option("-j", "--jvm", metavar="", dest="jvmParameters", action="store",
+ default=DEFAULT_JVM, help="Set jvm parameters if necessary.")
+ prodEnvOptionGroup.add_option("--jobid", metavar="", dest="jobid", action="store", default="-1",
+ help="Set job unique id when running by Distribute/Local Mode.")
+ prodEnvOptionGroup.add_option("-m", "--mode", metavar="",
+ action="store", default="standalone",
+ help="Set job runtime mode such as: standalone, local, distribute. "
+ "Default mode is standalone.")
+ prodEnvOptionGroup.add_option("-p", "--params", metavar="",
+ action="store", dest="params",
+ help='Set job parameter, eg: the source tableName you want to set it by command, '
+ 'then you can use like this: -p"-DtableName=your-table-name", '
+ 'if you have mutiple parameters: -p"-DtableName=your-table-name -DcolumnName=your-column-name".'
+ 'Note: you should config in you job tableName with ${tableName}.')
+ prodEnvOptionGroup.add_option("-r", "--reader", metavar="",
+ action="store", dest="reader",type="string",
+ help='View job config[reader] template, eg: mysqlreader,streamreader')
+ prodEnvOptionGroup.add_option("-w", "--writer", metavar="",
+ action="store", dest="writer",type="string",
+ help='View job config[writer] template, eg: mysqlwriter,streamwriter')
+ parser.add_option_group(prodEnvOptionGroup)
+
+ devEnvOptionGroup = OptionGroup(parser, "Develop/Debug Options",
+ "Developer use these options to trace more details of DataX.")
+ devEnvOptionGroup.add_option("-d", "--debug", dest="remoteDebug", action="store_true",
+ help="Set to remote debug mode.")
+ devEnvOptionGroup.add_option("--loglevel", metavar="", dest="loglevel", action="store",
+ default="info", help="Set log level such as: debug, info, all etc.")
+ parser.add_option_group(devEnvOptionGroup)
+ return parser
+
+def generateJobConfigTemplate(reader, writer):
+ readerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n" % (reader,reader,reader)
+ writerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n " % (writer,writer,writer)
+ print readerRef
+ print writerRef
+ jobGuid = 'Please save the following configuration as a json file and use\n python {DATAX_HOME}/bin/datax.py {JSON_FILE_NAME}.json \nto run the job.\n'
+ print jobGuid
+ jobTemplate={
+ "job": {
+ "setting": {
+ "speed": {
+ "channel": ""
+ }
+ },
+ "content": [
+ {
+ "reader": {},
+ "writer": {}
+ }
+ ]
+ }
+ }
+ readerTemplatePath = "%s/plugin/reader/%s/plugin_job_template.json" % (DATAX_HOME,reader)
+ writerTemplatePath = "%s/plugin/writer/%s/plugin_job_template.json" % (DATAX_HOME,writer)
+ try:
+ readerPar = readPluginTemplate(readerTemplatePath);
+ except Exception, e:
+ print "Read reader[%s] template error: can\'t find file %s" % (reader,readerTemplatePath)
+ try:
+ writerPar = readPluginTemplate(writerTemplatePath);
+ except Exception, e:
+ print "Read writer[%s] template error: : can\'t find file %s" % (writer,writerTemplatePath)
+ jobTemplate['job']['content'][0]['reader'] = readerPar;
+ jobTemplate['job']['content'][0]['writer'] = writerPar;
+ print json.dumps(jobTemplate, indent=4, sort_keys=True)
+
+def readPluginTemplate(plugin):
+ with open(plugin, 'r') as f:
+ return json.load(f)
+
+def isUrl(path):
+ if not path:
+ return False
+
+ assert (isinstance(path, str))
+ m = re.match(r"^http[s]?://\S+\w*", path.lower())
+ if m:
+ return True
+ else:
+ return False
+
+
+def buildStartCommand(options, args):
+ commandMap = {}
+ tempJVMCommand = DEFAULT_JVM
+ if options.jvmParameters:
+ tempJVMCommand = tempJVMCommand + " " + options.jvmParameters
+
+ if options.remoteDebug:
+ tempJVMCommand = tempJVMCommand + " " + REMOTE_DEBUG_CONFIG
+ print 'local ip: ', getLocalIp()
+
+ if options.loglevel:
+ tempJVMCommand = tempJVMCommand + " " + ("-Dloglevel=%s" % (options.loglevel))
+
+ if options.mode:
+ commandMap["mode"] = options.mode
+
+ # jobResource 可能是 URL,也可能是本地文件路径(相对,绝对)
+ jobResource = args[0]
+ if not isUrl(jobResource):
+ jobResource = os.path.abspath(jobResource)
+ if jobResource.lower().startswith("file://"):
+ jobResource = jobResource[len("file://"):]
+
+ jobParams = ("-Dlog.file.name=%s") % (jobResource[-20:].replace('/', '_').replace('.', '_'))
+ if options.params:
+ jobParams = jobParams + " " + options.params
+
+ if options.jobid:
+ commandMap["jobid"] = options.jobid
+
+ commandMap["jvm"] = tempJVMCommand
+ commandMap["params"] = jobParams
+ commandMap["job"] = jobResource
+
+ return Template(ENGINE_COMMAND).substitute(**commandMap)
+
+
+def printCopyright():
+ print '''
+DataX (%s), From Alibaba !
+Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
+
+''' % DATAX_VERSION
+ sys.stdout.flush()
+
+
+if __name__ == "__main__":
+ printCopyright()
+ parser = getOptionParser()
+ options, args = parser.parse_args(sys.argv[1:])
+ if options.reader is not None and options.writer is not None:
+ generateJobConfigTemplate(options.reader,options.writer)
+ sys.exit(RET_STATE['OK'])
+ if len(args) != 1:
+ parser.print_help()
+ sys.exit(RET_STATE['FAIL'])
+
+ startCommand = buildStartCommand(options, args)
+ # print startCommand
+
+ child_process = subprocess.Popen(startCommand, shell=True)
+ register_signal()
+ (stdout, stderr) = child_process.communicate()
+
+ sys.exit(child_process.returncode)
diff --git a/core/src/main/bin/dxprof.py b/core/src/main/bin/dxprof.py
new file mode 100644
index 0000000000..181bf90085
--- /dev/null
+++ b/core/src/main/bin/dxprof.py
@@ -0,0 +1,191 @@
+#! /usr/bin/env python
+# vim: set expandtab tabstop=4 shiftwidth=4 foldmethod=marker nu:
+
+import re
+import sys
+import time
+
+REG_SQL_WAKE = re.compile(r'Begin\s+to\s+read\s+record\s+by\s+Sql', re.IGNORECASE)
+REG_SQL_DONE = re.compile(r'Finished\s+read\s+record\s+by\s+Sql', re.IGNORECASE)
+REG_SQL_PATH = re.compile(r'from\s+(\w+)(\s+where|\s*$)', re.IGNORECASE)
+REG_SQL_JDBC = re.compile(r'jdbcUrl:\s*\[(.+?)\]', re.IGNORECASE)
+REG_SQL_UUID = re.compile(r'(\d+\-)+reader')
+REG_COMMIT_UUID = re.compile(r'(\d+\-)+writer')
+REG_COMMIT_WAKE = re.compile(r'begin\s+to\s+commit\s+blocks', re.IGNORECASE)
+REG_COMMIT_DONE = re.compile(r'commit\s+blocks\s+ok', re.IGNORECASE)
+
+# {{{ function parse_timestamp() #
+def parse_timestamp(line):
+ try:
+ ts = int(time.mktime(time.strptime(line[0:19], '%Y-%m-%d %H:%M:%S')))
+ except:
+ ts = 0
+
+ return ts
+
+# }}} #
+
+# {{{ function parse_query_host() #
+def parse_query_host(line):
+ ori = REG_SQL_JDBC.search(line)
+ if (not ori):
+ return ''
+
+ ori = ori.group(1).split('?')[0]
+ off = ori.find('@')
+ if (off > -1):
+ ori = ori[off+1:len(ori)]
+ else:
+ off = ori.find('//')
+ if (off > -1):
+ ori = ori[off+2:len(ori)]
+
+ return ori.lower()
+# }}} #
+
+# {{{ function parse_query_table() #
+def parse_query_table(line):
+ ori = REG_SQL_PATH.search(line)
+ return (ori and ori.group(1).lower()) or ''
+# }}} #
+
+# {{{ function parse_reader_task() #
+def parse_task(fname):
+ global LAST_SQL_UUID
+ global LAST_COMMIT_UUID
+ global DATAX_JOBDICT
+ global DATAX_JOBDICT_COMMIT
+ global UNIXTIME
+ LAST_SQL_UUID = ''
+ DATAX_JOBDICT = {}
+ LAST_COMMIT_UUID = ''
+ DATAX_JOBDICT_COMMIT = {}
+
+ UNIXTIME = int(time.time())
+ with open(fname, 'r') as f:
+ for line in f.readlines():
+ line = line.strip()
+
+ if (LAST_SQL_UUID and (LAST_SQL_UUID in DATAX_JOBDICT)):
+ DATAX_JOBDICT[LAST_SQL_UUID]['host'] = parse_query_host(line)
+ LAST_SQL_UUID = ''
+
+ if line.find('CommonRdbmsReader$Task') > 0:
+ parse_read_task(line)
+ elif line.find('commit blocks') > 0:
+ parse_write_task(line)
+ else:
+ continue
+# }}} #
+
+# {{{ function parse_read_task() #
+def parse_read_task(line):
+ ser = REG_SQL_UUID.search(line)
+ if not ser:
+ return
+
+ LAST_SQL_UUID = ser.group()
+ if REG_SQL_WAKE.search(line):
+ DATAX_JOBDICT[LAST_SQL_UUID] = {
+ 'stat' : 'R',
+ 'wake' : parse_timestamp(line),
+ 'done' : UNIXTIME,
+ 'host' : parse_query_host(line),
+ 'path' : parse_query_table(line)
+ }
+ elif ((LAST_SQL_UUID in DATAX_JOBDICT) and REG_SQL_DONE.search(line)):
+ DATAX_JOBDICT[LAST_SQL_UUID]['stat'] = 'D'
+ DATAX_JOBDICT[LAST_SQL_UUID]['done'] = parse_timestamp(line)
+# }}} #
+
+# {{{ function parse_write_task() #
+def parse_write_task(line):
+ ser = REG_COMMIT_UUID.search(line)
+ if not ser:
+ return
+
+ LAST_COMMIT_UUID = ser.group()
+ if REG_COMMIT_WAKE.search(line):
+ DATAX_JOBDICT_COMMIT[LAST_COMMIT_UUID] = {
+ 'stat' : 'R',
+ 'wake' : parse_timestamp(line),
+ 'done' : UNIXTIME,
+ }
+ elif ((LAST_COMMIT_UUID in DATAX_JOBDICT_COMMIT) and REG_COMMIT_DONE.search(line)):
+ DATAX_JOBDICT_COMMIT[LAST_COMMIT_UUID]['stat'] = 'D'
+ DATAX_JOBDICT_COMMIT[LAST_COMMIT_UUID]['done'] = parse_timestamp(line)
+# }}} #
+
+# {{{ function result_analyse() #
+def result_analyse():
+ def compare(a, b):
+ return b['cost'] - a['cost']
+
+ tasklist = []
+ hostsmap = {}
+ statvars = {'sum' : 0, 'cnt' : 0, 'svr' : 0, 'max' : 0, 'min' : int(time.time())}
+ tasklist_commit = []
+ statvars_commit = {'sum' : 0, 'cnt' : 0}
+
+ for idx in DATAX_JOBDICT:
+ item = DATAX_JOBDICT[idx]
+ item['uuid'] = idx;
+ item['cost'] = item['done'] - item['wake']
+ tasklist.append(item);
+
+ if (not (item['host'] in hostsmap)):
+ hostsmap[item['host']] = 1
+ statvars['svr'] += 1
+
+ if (item['cost'] > -1 and item['cost'] < 864000):
+ statvars['sum'] += item['cost']
+ statvars['cnt'] += 1
+ statvars['max'] = max(statvars['max'], item['done'])
+ statvars['min'] = min(statvars['min'], item['wake'])
+
+ for idx in DATAX_JOBDICT_COMMIT:
+ itemc = DATAX_JOBDICT_COMMIT[idx]
+ itemc['uuid'] = idx
+ itemc['cost'] = itemc['done'] - itemc['wake']
+ tasklist_commit.append(itemc)
+
+ if (itemc['cost'] > -1 and itemc['cost'] < 864000):
+ statvars_commit['sum'] += itemc['cost']
+ statvars_commit['cnt'] += 1
+
+ ttl = (statvars['max'] - statvars['min']) or 1
+ idx = float(statvars['cnt']) / (statvars['sum'] or ttl)
+
+ tasklist.sort(compare)
+ for item in tasklist:
+ print '%s\t%s.%s\t%s\t%s\t% 4d\t% 2.1f%%\t% .2f' %(item['stat'], item['host'], item['path'],
+ time.strftime('%H:%M:%S', time.localtime(item['wake'])),
+ (('D' == item['stat']) and time.strftime('%H:%M:%S', time.localtime(item['done']))) or '--',
+ item['cost'], 100 * item['cost'] / ttl, idx * item['cost'])
+
+ if (not len(tasklist) or not statvars['cnt']):
+ return
+
+ print '\n--- DataX Profiling Statistics ---'
+ print '%d task(s) on %d server(s), Total elapsed %d second(s), %.2f second(s) per task in average' %(statvars['cnt'],
+ statvars['svr'], statvars['sum'], float(statvars['sum']) / statvars['cnt'])
+ print 'Actually cost %d second(s) (%s - %s), task concurrency: %.2f, tilt index: %.2f' %(ttl,
+ time.strftime('%H:%M:%S', time.localtime(statvars['min'])),
+ time.strftime('%H:%M:%S', time.localtime(statvars['max'])),
+ float(statvars['sum']) / ttl, idx * tasklist[0]['cost'])
+
+ idx_commit = float(statvars_commit['cnt']) / (statvars_commit['sum'] or ttl)
+ tasklist_commit.sort(compare)
+ print '%d task(s) done odps comit, Total elapsed %d second(s), %.2f second(s) per task in average, tilt index: %.2f' % (
+ statvars_commit['cnt'],
+ statvars_commit['sum'], float(statvars_commit['sum']) / statvars_commit['cnt'],
+ idx_commit * tasklist_commit[0]['cost'])
+
+# }}} #
+
+if (len(sys.argv) < 2):
+ print "Usage: %s filename" %(sys.argv[0])
+ quit(1)
+else:
+ parse_task(sys.argv[1])
+ result_analyse()
\ No newline at end of file
diff --git a/core/src/main/bin/perftrace.py b/core/src/main/bin/perftrace.py
new file mode 100755
index 0000000000..41a1ecb305
--- /dev/null
+++ b/core/src/main/bin/perftrace.py
@@ -0,0 +1,400 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+
+
+"""
+ Life's short, Python more.
+"""
+
+import re
+import os
+import sys
+import json
+import uuid
+import signal
+import time
+import subprocess
+from optparse import OptionParser
+reload(sys)
+sys.setdefaultencoding('utf8')
+
+##begin cli & help logic
+def getOptionParser():
+ usage = getUsage()
+ parser = OptionParser(usage = usage)
+ #rdbms reader and writer
+ parser.add_option('-r', '--reader', action='store', dest='reader', help='trace datasource read performance with specified !json! string')
+ parser.add_option('-w', '--writer', action='store', dest='writer', help='trace datasource write performance with specified !json! string')
+
+ parser.add_option('-c', '--channel', action='store', dest='channel', default='1', help='the number of concurrent sync thread, the default is 1')
+ parser.add_option('-f', '--file', action='store', help='existing datax configuration file, include reader and writer params')
+ parser.add_option('-t', '--type', action='store', default='reader', help='trace which side\'s performance, cooperate with -f --file params, need to be reader or writer')
+ parser.add_option('-d', '--delete', action='store', default='true', help='delete temporary files, the default value is true')
+ #parser.add_option('-h', '--help', action='store', default='true', help='print usage information')
+ return parser
+
+def getUsage():
+ return '''
+The following params are available for -r --reader:
+ [these params is for rdbms reader, used to trace rdbms read performance, it's like datax's key]
+ *datasourceType: datasource type, may be mysql|drds|oracle|ads|sqlserver|postgresql|db2 etc...
+ *jdbcUrl: datasource jdbc connection string, mysql as a example: jdbc:mysql://ip:port/database
+ *username: username for datasource
+ *password: password for datasource
+ *table: table name for read data
+ column: column to be read, the default value is ['*']
+ splitPk: the splitPk column of rdbms table
+ where: limit the scope of the performance data set
+ fetchSize: how many rows to be fetched at each communicate
+
+ [these params is for stream reader, used to trace rdbms write performance]
+ reader-sliceRecordCount: how man test data to mock(each channel), the default value is 10000
+ reader-column : stream reader while generate test data(type supports: string|long|date|double|bool|bytes; support constant value and random function),demo: [{"type":"string","value":"abc"},{"type":"string","random":"10,20"}]
+
+The following params are available for -w --writer:
+ [these params is for rdbms writer, used to trace rdbms write performance, it's like datax's key]
+ datasourceType: datasource type, may be mysql|drds|oracle|ads|sqlserver|postgresql|db2|ads etc...
+ *jdbcUrl: datasource jdbc connection string, mysql as a example: jdbc:mysql://ip:port/database
+ *username: username for datasource
+ *password: password for datasource
+ *table: table name for write data
+ column: column to be writed, the default value is ['*']
+ batchSize: how many rows to be storeed at each communicate, the default value is 512
+ preSql: prepare sql to be executed before write data, the default value is ''
+ postSql: post sql to be executed end of write data, the default value is ''
+ url: required for ads, pattern is ip:port
+ schme: required for ads, ads database name
+
+ [these params is for stream writer, used to trace rdbms read performance]
+ writer-print: true means print data read from source datasource, the default value is false
+
+The following params are available global control:
+ -c --channel: the number of concurrent tasks, the default value is 1
+ -f --file: existing completely dataX configuration file path
+ -t --type: test read or write performance for a datasource, couble be reader or writer, in collaboration with -f --file
+ -h --help: print help message
+
+some demo:
+perftrace.py --channel=10 --reader='{"jdbcUrl":"jdbc:mysql://127.0.0.1:3306/database", "username":"", "password":"", "table": "", "where":"", "splitPk":"", "writer-print":"false"}'
+perftrace.py --channel=10 --writer='{"jdbcUrl":"jdbc:mysql://127.0.0.1:3306/database", "username":"", "password":"", "table": "", "reader-sliceRecordCount": "10000", "reader-column": [{"type":"string","value":"abc"},{"type":"string","random":"10,20"}]}'
+perftrace.py --file=/tmp/datax.job.json --type=reader --reader='{"writer-print": "false"}'
+perftrace.py --file=/tmp/datax.job.json --type=writer --writer='{"reader-sliceRecordCount": "10000", "reader-column": [{"type":"string","value":"abc"},{"type":"string","random":"10,20"}]}'
+
+some example jdbc url pattern, may help:
+jdbc:oracle:thin:@ip:port:database
+jdbc:mysql://ip:port/database
+jdbc:sqlserver://ip:port;DatabaseName=database
+jdbc:postgresql://ip:port/database
+warn: ads url pattern is ip:port
+warn: test write performance will write data into your table, you can use a temporary table just for test.
+'''
+
+def printCopyright():
+ DATAX_VERSION = 'UNKNOWN_DATAX_VERSION'
+ print '''
+DataX Util Tools (%s), From Alibaba !
+Copyright (C) 2010-2016, Alibaba Group. All Rights Reserved.''' % DATAX_VERSION
+ sys.stdout.flush()
+
+
+def yesNoChoice():
+ yes = set(['yes','y', 'ye', ''])
+ no = set(['no','n'])
+ choice = raw_input().lower()
+ if choice in yes:
+ return True
+ elif choice in no:
+ return False
+ else:
+ sys.stdout.write("Please respond with 'yes' or 'no'")
+##end cli & help logic
+
+
+##begin process logic
+def suicide(signum, e):
+ global childProcess
+ print >> sys.stderr, "[Error] Receive unexpected signal %d, starts to suicide." % (signum)
+ if childProcess:
+ childProcess.send_signal(signal.SIGQUIT)
+ time.sleep(1)
+ childProcess.kill()
+ print >> sys.stderr, "DataX Process was killed ! you did ?"
+ sys.exit(-1)
+
+
+def registerSignal():
+ global childProcess
+ signal.signal(2, suicide)
+ signal.signal(3, suicide)
+ signal.signal(15, suicide)
+
+
+def fork(command, isShell=False):
+ global childProcess
+ childProcess = subprocess.Popen(command, shell = isShell)
+ registerSignal()
+ (stdout, stderr) = childProcess.communicate()
+ #阻塞直到子进程结束
+ childProcess.wait()
+ return childProcess.returncode
+##end process logic
+
+
+##begin datax json generate logic
+#warn: if not '': -> true; if not None: -> true
+def notNone(obj, context):
+ if not obj:
+ raise Exception("Configuration property [%s] could not be blank!" % (context))
+
+def attributeNotNone(obj, attributes):
+ for key in attributes:
+ notNone(obj.get(key), key)
+
+def isBlank(value):
+ if value is None or len(value.strip()) == 0:
+ return True
+ return False
+
+def parsePluginName(jdbcUrl, pluginType):
+ import re
+ #warn: drds
+ name = 'pluginName'
+ mysqlRegex = re.compile('jdbc:(mysql)://.*')
+ if (mysqlRegex.match(jdbcUrl)):
+ name = 'mysql'
+ postgresqlRegex = re.compile('jdbc:(postgresql)://.*')
+ if (postgresqlRegex.match(jdbcUrl)):
+ name = 'postgresql'
+ oracleRegex = re.compile('jdbc:(oracle):.*')
+ if (oracleRegex.match(jdbcUrl)):
+ name = 'oracle'
+ sqlserverRegex = re.compile('jdbc:(sqlserver)://.*')
+ if (sqlserverRegex.match(jdbcUrl)):
+ name = 'sqlserver'
+ db2Regex = re.compile('jdbc:(db2)://.*')
+ if (db2Regex.match(jdbcUrl)):
+ name = 'db2'
+ return "%s%s" % (name, pluginType)
+
+def renderDataXJson(paramsDict, readerOrWriter = 'reader', channel = 1):
+ dataxTemplate = {
+ "job": {
+ "setting": {
+ "speed": {
+ "channel": 1
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "",
+ "parameter": {
+ "username": "",
+ "password": "",
+ "sliceRecordCount": "10000",
+ "column": [
+ "*"
+ ],
+ "connection": [
+ {
+ "table": [],
+ "jdbcUrl": []
+ }
+ ]
+ }
+ },
+ "writer": {
+ "name": "",
+ "parameter": {
+ "print": "false",
+ "connection": [
+ {
+ "table": [],
+ "jdbcUrl": ''
+ }
+ ]
+ }
+ }
+ }
+ ]
+ }
+ }
+ dataxTemplate['job']['setting']['speed']['channel'] = channel
+ dataxTemplateContent = dataxTemplate['job']['content'][0]
+
+ pluginName = ''
+ if paramsDict.get('datasourceType'):
+ pluginName = '%s%s' % (paramsDict['datasourceType'], readerOrWriter)
+ elif paramsDict.get('jdbcUrl'):
+ pluginName = parsePluginName(paramsDict['jdbcUrl'], readerOrWriter)
+ elif paramsDict.get('url'):
+ pluginName = 'adswriter'
+
+ theOtherSide = 'writer' if readerOrWriter == 'reader' else 'reader'
+ dataxPluginParamsContent = dataxTemplateContent.get(readerOrWriter).get('parameter')
+ dataxPluginParamsContent.update(paramsDict)
+
+ dataxPluginParamsContentOtherSide = dataxTemplateContent.get(theOtherSide).get('parameter')
+
+ if readerOrWriter == 'reader':
+ dataxTemplateContent.get('reader')['name'] = pluginName
+ dataxTemplateContent.get('writer')['name'] = 'streamwriter'
+ if paramsDict.get('writer-print'):
+ dataxPluginParamsContentOtherSide['print'] = paramsDict['writer-print']
+ del dataxPluginParamsContent['writer-print']
+ del dataxPluginParamsContentOtherSide['connection']
+ if readerOrWriter == 'writer':
+ dataxTemplateContent.get('reader')['name'] = 'streamreader'
+ dataxTemplateContent.get('writer')['name'] = pluginName
+ if paramsDict.get('reader-column'):
+ dataxPluginParamsContentOtherSide['column'] = paramsDict['reader-column']
+ del dataxPluginParamsContent['reader-column']
+ if paramsDict.get('reader-sliceRecordCount'):
+ dataxPluginParamsContentOtherSide['sliceRecordCount'] = paramsDict['reader-sliceRecordCount']
+ del dataxPluginParamsContent['reader-sliceRecordCount']
+ del dataxPluginParamsContentOtherSide['connection']
+
+ if paramsDict.get('jdbcUrl'):
+ if readerOrWriter == 'reader':
+ dataxPluginParamsContent['connection'][0]['jdbcUrl'].append(paramsDict['jdbcUrl'])
+ else:
+ dataxPluginParamsContent['connection'][0]['jdbcUrl'] = paramsDict['jdbcUrl']
+ if paramsDict.get('table'):
+ dataxPluginParamsContent['connection'][0]['table'].append(paramsDict['table'])
+
+
+ traceJobJson = json.dumps(dataxTemplate, indent = 4)
+ return traceJobJson
+
+def isUrl(path):
+ if not path:
+ return False
+ if not isinstance(path, str):
+ raise Exception('Configuration file path required for the string, you configure is:%s' % path)
+ m = re.match(r"^http[s]?://\S+\w*", path.lower())
+ if m:
+ return True
+ else:
+ return False
+
+
+def readJobJsonFromLocal(jobConfigPath):
+ jobConfigContent = None
+ jobConfigPath = os.path.abspath(jobConfigPath)
+ file = open(jobConfigPath)
+ try:
+ jobConfigContent = file.read()
+ finally:
+ file.close()
+ if not jobConfigContent:
+ raise Exception("Your job configuration file read the result is empty, please check the configuration is legal, path: [%s]\nconfiguration:\n%s" % (jobConfigPath, str(jobConfigContent)))
+ return jobConfigContent
+
+
+def readJobJsonFromRemote(jobConfigPath):
+ import urllib
+ conn = urllib.urlopen(jobConfigPath)
+ jobJson = conn.read()
+ return jobJson
+
+def parseJson(strConfig, context):
+ try:
+ return json.loads(strConfig)
+ except Exception, e:
+ import traceback
+ traceback.print_exc()
+ sys.stdout.flush()
+ print >> sys.stderr, '%s %s need in line with json syntax' % (context, strConfig)
+ sys.exit(-1)
+
+def convert(options, args):
+ traceJobJson = ''
+ if options.file:
+ if isUrl(options.file):
+ traceJobJson = readJobJsonFromRemote(options.file)
+ else:
+ traceJobJson = readJobJsonFromLocal(options.file)
+ traceJobDict = parseJson(traceJobJson, '%s content' % options.file)
+ attributeNotNone(traceJobDict, ['job'])
+ attributeNotNone(traceJobDict['job'], ['content'])
+ attributeNotNone(traceJobDict['job']['content'][0], ['reader', 'writer'])
+ attributeNotNone(traceJobDict['job']['content'][0]['reader'], ['name', 'parameter'])
+ attributeNotNone(traceJobDict['job']['content'][0]['writer'], ['name', 'parameter'])
+ if options.type == 'reader':
+ traceJobDict['job']['content'][0]['writer']['name'] = 'streamwriter'
+ if options.reader:
+ traceReaderDict = parseJson(options.reader, 'reader config')
+ if traceReaderDict.get('writer-print') is not None:
+ traceJobDict['job']['content'][0]['writer']['parameter']['print'] = traceReaderDict.get('writer-print')
+ else:
+ traceJobDict['job']['content'][0]['writer']['parameter']['print'] = 'false'
+ else:
+ traceJobDict['job']['content'][0]['writer']['parameter']['print'] = 'false'
+ elif options.type == 'writer':
+ traceJobDict['job']['content'][0]['reader']['name'] = 'streamreader'
+ if options.writer:
+ traceWriterDict = parseJson(options.writer, 'writer config')
+ if traceWriterDict.get('reader-column'):
+ traceJobDict['job']['content'][0]['reader']['parameter']['column'] = traceWriterDict['reader-column']
+ if traceWriterDict.get('reader-sliceRecordCount'):
+ traceJobDict['job']['content'][0]['reader']['parameter']['sliceRecordCount'] = traceWriterDict['reader-sliceRecordCount']
+ else:
+ columnSize = len(traceJobDict['job']['content'][0]['writer']['parameter']['column'])
+ streamReaderColumn = []
+ for i in range(columnSize):
+ streamReaderColumn.append({"type": "long", "random": "2,10"})
+ traceJobDict['job']['content'][0]['reader']['parameter']['column'] = streamReaderColumn
+ traceJobDict['job']['content'][0]['reader']['parameter']['sliceRecordCount'] = 10000
+ else:
+ pass#do nothing
+ return json.dumps(traceJobDict, indent = 4)
+ elif options.reader:
+ traceReaderDict = parseJson(options.reader, 'reader config')
+ return renderDataXJson(traceReaderDict, 'reader', options.channel)
+ elif options.writer:
+ traceWriterDict = parseJson(options.writer, 'writer config')
+ return renderDataXJson(traceWriterDict, 'writer', options.channel)
+ else:
+ print getUsage()
+ sys.exit(-1)
+ #dataxParams = {}
+ #for opt, value in options.__dict__.items():
+ # dataxParams[opt] = value
+##end datax json generate logic
+
+
+if __name__ == "__main__":
+ printCopyright()
+ parser = getOptionParser()
+
+ options, args = parser.parse_args(sys.argv[1:])
+ #print options, args
+ dataxTraceJobJson = convert(options, args)
+
+ #由MAC地址、当前时间戳、随机数生成,可以保证全球范围内的唯一性
+ dataxJobPath = os.path.join(os.getcwd(), "perftrace-" + str(uuid.uuid1()))
+ jobConfigOk = True
+ if os.path.exists(dataxJobPath):
+ print "file already exists, truncate and rewrite it? %s" % dataxJobPath
+ if yesNoChoice():
+ jobConfigOk = True
+ else:
+ print "exit failed, because of file conflict"
+ sys.exit(-1)
+ fileWriter = open(dataxJobPath, 'w')
+ fileWriter.write(dataxTraceJobJson)
+ fileWriter.close()
+
+
+ print "trace environments:"
+ print "dataxJobPath: %s" % dataxJobPath
+ dataxHomePath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+ print "dataxHomePath: %s" % dataxHomePath
+
+ dataxCommand = "%s %s" % (os.path.join(dataxHomePath, "bin", "datax.py"), dataxJobPath)
+ print "dataxCommand: %s" % dataxCommand
+
+ returncode = fork(dataxCommand, True)
+ if options.delete == 'true':
+ os.remove(dataxJobPath)
+ sys.exit(returncode)
diff --git a/core/src/main/conf/.secret.properties b/core/src/main/conf/.secret.properties
new file mode 100755
index 0000000000..b807f8ad63
--- /dev/null
+++ b/core/src/main/conf/.secret.properties
@@ -0,0 +1,9 @@
+#ds basicAuth config
+auth.user=
+auth.pass=
+current.keyVersion=
+current.publicKey=
+current.privateKey=
+current.service.username=
+current.service.password=
+
diff --git a/core/src/main/conf/core.json b/core/src/main/conf/core.json
new file mode 100755
index 0000000000..5aa855bc81
--- /dev/null
+++ b/core/src/main/conf/core.json
@@ -0,0 +1,61 @@
+
+{
+ "entry": {
+ "jvm": "-Xms1G -Xmx1G",
+ "environment": {}
+ },
+ "common": {
+ "column": {
+ "datetimeFormat": "yyyy-MM-dd HH:mm:ss",
+ "timeFormat": "HH:mm:ss",
+ "dateFormat": "yyyy-MM-dd",
+ "extraFormats":["yyyyMMdd"],
+ "timeZone": "GMT+8",
+ "encoding": "utf-8"
+ }
+ },
+ "core": {
+ "dataXServer": {
+ "address": "http://localhost:7001/api",
+ "timeout": 10000,
+ "reportDataxLog": false,
+ "reportPerfLog": false
+ },
+ "transport": {
+ "channel": {
+ "class": "com.alibaba.datax.core.transport.channel.memory.MemoryChannel",
+ "speed": {
+ "byte": -1,
+ "record": -1
+ },
+ "flowControlInterval": 20,
+ "capacity": 512,
+ "byteCapacity": 67108864
+ },
+ "exchanger": {
+ "class": "com.alibaba.datax.core.plugin.BufferedRecordExchanger",
+ "bufferSize": 32
+ }
+ },
+ "container": {
+ "job": {
+ "reportInterval": 10000
+ },
+ "taskGroup": {
+ "channel": 5
+ },
+ "trace": {
+ "enable": "false"
+ }
+
+ },
+ "statistics": {
+ "collector": {
+ "plugin": {
+ "taskClass": "com.alibaba.datax.core.statistics.plugin.task.StdoutPluginCollector",
+ "maxDirtyNumber": 10
+ }
+ }
+ }
+ }
+}
diff --git a/core/src/main/conf/logback.xml b/core/src/main/conf/logback.xml
new file mode 100755
index 0000000000..15e4880336
--- /dev/null
+++ b/core/src/main/conf/logback.xml
@@ -0,0 +1,150 @@
+
+
+
+
+
+
+
+
+
+ UTF-8
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{0} - %msg%n
+
+
+
+
+
+ UTF-8
+ ${log.dir}/${ymd}/${log.file.name}-${byMillionSecond}.log
+ false
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{0} - %msg%n
+
+
+
+
+
+ UTF-8
+ ${perf.dir}/${ymd}/${log.file.name}-${byMillionSecond}.log
+ false
+
+ %msg%n
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/core/src/main/java/com/alibaba/datax/core/AbstractContainer.java b/core/src/main/java/com/alibaba/datax/core/AbstractContainer.java
new file mode 100755
index 0000000000..c4e09b757e
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/AbstractContainer.java
@@ -0,0 +1,35 @@
+package com.alibaba.datax.core;
+
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.core.statistics.container.communicator.AbstractContainerCommunicator;
+import org.apache.commons.lang.Validate;
+
+/**
+ * 执行容器的抽象类,持有该容器全局的配置 configuration
+ */
+public abstract class AbstractContainer {
+ protected Configuration configuration;
+
+ protected AbstractContainerCommunicator containerCommunicator;
+
+ public AbstractContainer(Configuration configuration) {
+ Validate.notNull(configuration, "Configuration can not be null.");
+
+ this.configuration = configuration;
+ }
+
+ public Configuration getConfiguration() {
+ return configuration;
+ }
+
+ public AbstractContainerCommunicator getContainerCommunicator() {
+ return containerCommunicator;
+ }
+
+ public void setContainerCommunicator(AbstractContainerCommunicator containerCommunicator) {
+ this.containerCommunicator = containerCommunicator;
+ }
+
+ public abstract void start();
+
+}
diff --git a/core/src/main/java/com/alibaba/datax/core/Engine.java b/core/src/main/java/com/alibaba/datax/core/Engine.java
new file mode 100755
index 0000000000..f80d792f3c
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/Engine.java
@@ -0,0 +1,223 @@
+package com.alibaba.datax.core;
+
+import com.alibaba.datax.common.element.ColumnCast;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.spi.ErrorCode;
+import com.alibaba.datax.common.statistics.PerfTrace;
+import com.alibaba.datax.common.statistics.VMInfo;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.core.job.JobContainer;
+import com.alibaba.datax.core.taskgroup.TaskGroupContainer;
+import com.alibaba.datax.core.util.ConfigParser;
+import com.alibaba.datax.core.util.ConfigurationValidate;
+import com.alibaba.datax.core.util.ExceptionTracker;
+import com.alibaba.datax.core.util.FrameworkErrorCode;
+import com.alibaba.datax.core.util.container.CoreConstant;
+import com.alibaba.datax.core.util.container.LoadUtil;
+import org.apache.commons.cli.BasicParser;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Engine是DataX入口类,该类负责初始化Job或者Task的运行容器,并运行插件的Job或者Task逻辑
+ */
+public class Engine {
+ private static final Logger LOG = LoggerFactory.getLogger(Engine.class);
+
+ private static String RUNTIME_MODE;
+
+ /* check job model (job/task) first */
+ public void start(Configuration allConf) {
+
+ // 绑定column转换信息
+ ColumnCast.bind(allConf);
+
+ /**
+ * 初始化PluginLoader,可以获取各种插件配置
+ */
+ LoadUtil.bind(allConf);
+
+ boolean isJob = !("taskGroup".equalsIgnoreCase(allConf
+ .getString(CoreConstant.DATAX_CORE_CONTAINER_MODEL)));
+ //JobContainer会在schedule后再行进行设置和调整值
+ int channelNumber =0;
+ AbstractContainer container;
+ long instanceId;
+ int taskGroupId = -1;
+ if (isJob) {
+ allConf.set(CoreConstant.DATAX_CORE_CONTAINER_JOB_MODE, RUNTIME_MODE);
+ container = new JobContainer(allConf);
+ instanceId = allConf.getLong(
+ CoreConstant.DATAX_CORE_CONTAINER_JOB_ID, 0);
+
+ } else {
+ container = new TaskGroupContainer(allConf);
+ instanceId = allConf.getLong(
+ CoreConstant.DATAX_CORE_CONTAINER_JOB_ID);
+ taskGroupId = allConf.getInt(
+ CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_ID);
+ channelNumber = allConf.getInt(
+ CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL);
+ }
+
+ //缺省打开perfTrace
+ boolean traceEnable = allConf.getBool(CoreConstant.DATAX_CORE_CONTAINER_TRACE_ENABLE, true);
+ boolean perfReportEnable = allConf.getBool(CoreConstant.DATAX_CORE_REPORT_DATAX_PERFLOG, true);
+
+ //standlone模式的datax shell任务不进行汇报
+ if(instanceId == -1){
+ perfReportEnable = false;
+ }
+
+ int priority = 0;
+ try {
+ priority = Integer.parseInt(System.getenv("SKYNET_PRIORITY"));
+ }catch (NumberFormatException e){
+ LOG.warn("prioriy set to 0, because NumberFormatException, the value is: "+System.getProperty("PROIORY"));
+ }
+
+ Configuration jobInfoConfig = allConf.getConfiguration(CoreConstant.DATAX_JOB_JOBINFO);
+ //初始化PerfTrace
+ PerfTrace perfTrace = PerfTrace.getInstance(isJob, instanceId, taskGroupId, priority, traceEnable);
+ perfTrace.setJobInfo(jobInfoConfig,perfReportEnable,channelNumber);
+ container.start();
+
+ }
+
+
+ // 注意屏蔽敏感信息
+ public static String filterJobConfiguration(final Configuration configuration) {
+ Configuration jobConfWithSetting = configuration.getConfiguration("job").clone();
+
+ Configuration jobContent = jobConfWithSetting.getConfiguration("content");
+
+ filterSensitiveConfiguration(jobContent);
+
+ jobConfWithSetting.set("content",jobContent);
+
+ return jobConfWithSetting.beautify();
+ }
+
+ public static Configuration filterSensitiveConfiguration(Configuration configuration){
+ Set keys = configuration.getKeys();
+ for (final String key : keys) {
+ boolean isSensitive = StringUtils.endsWithIgnoreCase(key, "password")
+ || StringUtils.endsWithIgnoreCase(key, "accessKey");
+ if (isSensitive && configuration.get(key) instanceof String) {
+ configuration.set(key, configuration.getString(key).replaceAll(".", "*"));
+ }
+ }
+ return configuration;
+ }
+
+ public static void entry(final String[] args) throws Throwable {
+ Options options = new Options();
+ options.addOption("job", true, "Job config.");
+ options.addOption("jobid", true, "Job unique id.");
+ options.addOption("mode", true, "Job runtime mode.");
+
+ BasicParser parser = new BasicParser();
+ CommandLine cl = parser.parse(options, args);
+
+ String jobPath = cl.getOptionValue("job");
+
+ // 如果用户没有明确指定jobid, 则 datax.py 会指定 jobid 默认值为-1
+ String jobIdString = cl.getOptionValue("jobid");
+ RUNTIME_MODE = cl.getOptionValue("mode");
+
+ Configuration configuration = ConfigParser.parse(jobPath);
+
+ long jobId;
+ if (!"-1".equalsIgnoreCase(jobIdString)) {
+ jobId = Long.parseLong(jobIdString);
+ } else {
+ // only for dsc & ds & datax 3 update
+ String dscJobUrlPatternString = "/instance/(\\d{1,})/config.xml";
+ String dsJobUrlPatternString = "/inner/job/(\\d{1,})/config";
+ String dsTaskGroupUrlPatternString = "/inner/job/(\\d{1,})/taskGroup/";
+ List patternStringList = Arrays.asList(dscJobUrlPatternString,
+ dsJobUrlPatternString, dsTaskGroupUrlPatternString);
+ jobId = parseJobIdFromUrl(patternStringList, jobPath);
+ }
+
+ boolean isStandAloneMode = "standalone".equalsIgnoreCase(RUNTIME_MODE);
+ if (!isStandAloneMode && jobId == -1) {
+ // 如果不是 standalone 模式,那么 jobId 一定不能为-1
+ throw DataXException.asDataXException(FrameworkErrorCode.CONFIG_ERROR, "非 standalone 模式必须在 URL 中提供有效的 jobId.");
+ }
+ configuration.set(CoreConstant.DATAX_CORE_CONTAINER_JOB_ID, jobId);
+
+ //打印vmInfo
+ VMInfo vmInfo = VMInfo.getVmInfo();
+ if (vmInfo != null) {
+ LOG.info(vmInfo.toString());
+ }
+
+ LOG.info("\n" + Engine.filterJobConfiguration(configuration) + "\n");
+
+ LOG.debug(configuration.toJSON());
+
+ ConfigurationValidate.doValidate(configuration);
+ Engine engine = new Engine();
+ engine.start(configuration);
+ }
+
+
+ /**
+ * -1 表示未能解析到 jobId
+ *
+ * only for dsc & ds & datax 3 update
+ */
+ private static long parseJobIdFromUrl(List patternStringList, String url) {
+ long result = -1;
+ for (String patternString : patternStringList) {
+ result = doParseJobIdFromUrl(patternString, url);
+ if (result != -1) {
+ return result;
+ }
+ }
+ return result;
+ }
+
+ private static long doParseJobIdFromUrl(String patternString, String url) {
+ Pattern pattern = Pattern.compile(patternString);
+ Matcher matcher = pattern.matcher(url);
+ if (matcher.find()) {
+ return Long.parseLong(matcher.group(1));
+ }
+
+ return -1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int exitCode = 0;
+ try {
+ Engine.entry(args);
+ } catch (Throwable e) {
+ exitCode = 1;
+ LOG.error("\n\n经DataX智能分析,该任务最可能的错误原因是:\n" + ExceptionTracker.trace(e));
+
+ if (e instanceof DataXException) {
+ DataXException tempException = (DataXException) e;
+ ErrorCode errorCode = tempException.getErrorCode();
+ if (errorCode instanceof FrameworkErrorCode) {
+ FrameworkErrorCode tempErrorCode = (FrameworkErrorCode) errorCode;
+ exitCode = tempErrorCode.toExitValue();
+ }
+ }
+
+ System.exit(exitCode);
+ }
+ System.exit(exitCode);
+ }
+
+}
diff --git a/core/src/main/java/com/alibaba/datax/core/container/util/HookInvoker.java b/core/src/main/java/com/alibaba/datax/core/container/util/HookInvoker.java
new file mode 100755
index 0000000000..6e0ef17825
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/container/util/HookInvoker.java
@@ -0,0 +1,91 @@
+package com.alibaba.datax.core.container.util;
+
+/**
+ * Created by xiafei.qiuxf on 14/12/17.
+ */
+
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.spi.Hook;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.core.util.FrameworkErrorCode;
+import com.alibaba.datax.core.util.container.JarLoader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.ServiceLoader;
+
+/**
+ * 扫描给定目录的所有一级子目录,每个子目录当作一个Hook的目录。
+ * 对于每个子目录,必须符合ServiceLoader的标准目录格式,见http://docs.oracle.com/javase/6/docs/api/java/util/ServiceLoader.html。
+ * 加载里头的jar,使用ServiceLoader机制调用。
+ */
+public class HookInvoker {
+
+ private static final Logger LOG = LoggerFactory.getLogger(HookInvoker.class);
+ private final Map msg;
+ private final Configuration conf;
+
+ private File baseDir;
+
+ public HookInvoker(String baseDirName, Configuration conf, Map msg) {
+ this.baseDir = new File(baseDirName);
+ this.conf = conf;
+ this.msg = msg;
+ }
+
+ public void invokeAll() {
+ if (!baseDir.exists() || baseDir.isFile()) {
+ LOG.info("No hook invoked, because base dir not exists or is a file: " + baseDir.getAbsolutePath());
+ return;
+ }
+
+ String[] subDirs = baseDir.list(new FilenameFilter() {
+ @Override
+ public boolean accept(File dir, String name) {
+ return new File(dir, name).isDirectory();
+ }
+ });
+
+ if (subDirs == null) {
+ throw DataXException.asDataXException(FrameworkErrorCode.HOOK_LOAD_ERROR, "获取HOOK子目录返回null");
+ }
+
+ for (String subDir : subDirs) {
+ doInvoke(new File(baseDir, subDir).getAbsolutePath());
+ }
+
+ }
+
+ private void doInvoke(String path) {
+ ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader();
+ try {
+ JarLoader jarLoader = new JarLoader(new String[]{path});
+ Thread.currentThread().setContextClassLoader(jarLoader);
+ Iterator hookIt = ServiceLoader.load(Hook.class).iterator();
+ if (!hookIt.hasNext()) {
+ LOG.warn("No hook defined under path: " + path);
+ } else {
+ Hook hook = hookIt.next();
+ LOG.info("Invoke hook [{}], path: {}", hook.getName(), path);
+ hook.invoke(conf, msg);
+ }
+ } catch (Exception e) {
+ LOG.error("Exception when invoke hook", e);
+ throw DataXException.asDataXException(
+ CommonErrorCode.HOOK_INTERNAL_ERROR, "Exception when invoke hook", e);
+ } finally {
+ Thread.currentThread().setContextClassLoader(oldClassLoader);
+ }
+ }
+
+ public static void main(String[] args) {
+ new HookInvoker("/Users/xiafei/workspace/datax3/target/datax/datax/hook",
+ null, new HashMap()).invokeAll();
+ }
+}
diff --git a/core/src/main/java/com/alibaba/datax/core/container/util/JobAssignUtil.java b/core/src/main/java/com/alibaba/datax/core/container/util/JobAssignUtil.java
new file mode 100755
index 0000000000..31ba60a4dd
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/container/util/JobAssignUtil.java
@@ -0,0 +1,177 @@
+package com.alibaba.datax.core.container.util;
+
+import com.alibaba.datax.common.constant.CommonConstant;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.core.util.container.CoreConstant;
+import org.apache.commons.lang.Validate;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.*;
+
+public final class JobAssignUtil {
+ private JobAssignUtil() {
+ }
+
+ /**
+ * 公平的分配 task 到对应的 taskGroup 中。
+ * 公平体现在:会考虑 task 中对资源负载作的 load 标识进行更均衡的作业分配操作。
+ * TODO 具体文档举例说明
+ */
+ public static List assignFairly(Configuration configuration, int channelNumber, int channelsPerTaskGroup) {
+ Validate.isTrue(configuration != null, "框架获得的 Job 不能为 null.");
+
+ List contentConfig = configuration.getListConfiguration(CoreConstant.DATAX_JOB_CONTENT);
+ Validate.isTrue(contentConfig.size() > 0, "框架获得的切分后的 Job 无内容.");
+
+ Validate.isTrue(channelNumber > 0 && channelsPerTaskGroup > 0,
+ "每个channel的平均task数[averTaskPerChannel],channel数目[channelNumber],每个taskGroup的平均channel数[channelsPerTaskGroup]都应该为正数");
+
+ int taskGroupNumber = (int) Math.ceil(1.0 * channelNumber / channelsPerTaskGroup);
+
+ Configuration aTaskConfig = contentConfig.get(0);
+
+ String readerResourceMark = aTaskConfig.getString(CoreConstant.JOB_READER_PARAMETER + "." +
+ CommonConstant.LOAD_BALANCE_RESOURCE_MARK);
+ String writerResourceMark = aTaskConfig.getString(CoreConstant.JOB_WRITER_PARAMETER + "." +
+ CommonConstant.LOAD_BALANCE_RESOURCE_MARK);
+
+ boolean hasLoadBalanceResourceMark = StringUtils.isNotBlank(readerResourceMark) ||
+ StringUtils.isNotBlank(writerResourceMark);
+
+ if (!hasLoadBalanceResourceMark) {
+ // fake 一个固定的 key 作为资源标识(在 reader 或者 writer 上均可,此处选择在 reader 上进行 fake)
+ for (Configuration conf : contentConfig) {
+ conf.set(CoreConstant.JOB_READER_PARAMETER + "." +
+ CommonConstant.LOAD_BALANCE_RESOURCE_MARK, "aFakeResourceMarkForLoadBalance");
+ }
+ // 是为了避免某些插件没有设置 资源标识 而进行了一次随机打乱操作
+ Collections.shuffle(contentConfig, new Random(System.currentTimeMillis()));
+ }
+
+ LinkedHashMap> resourceMarkAndTaskIdMap = parseAndGetResourceMarkAndTaskIdMap(contentConfig);
+ List taskGroupConfig = doAssign(resourceMarkAndTaskIdMap, configuration, taskGroupNumber);
+
+ // 调整 每个 taskGroup 对应的 Channel 个数(属于优化范畴)
+ adjustChannelNumPerTaskGroup(taskGroupConfig, channelNumber);
+ return taskGroupConfig;
+ }
+
+ private static void adjustChannelNumPerTaskGroup(List taskGroupConfig, int channelNumber) {
+ int taskGroupNumber = taskGroupConfig.size();
+ int avgChannelsPerTaskGroup = channelNumber / taskGroupNumber;
+ int remainderChannelCount = channelNumber % taskGroupNumber;
+ // 表示有 remainderChannelCount 个 taskGroup,其对应 Channel 个数应该为:avgChannelsPerTaskGroup + 1;
+ // (taskGroupNumber - remainderChannelCount)个 taskGroup,其对应 Channel 个数应该为:avgChannelsPerTaskGroup
+
+ int i = 0;
+ for (; i < remainderChannelCount; i++) {
+ taskGroupConfig.get(i).set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL, avgChannelsPerTaskGroup + 1);
+ }
+
+ for (int j = 0; j < taskGroupNumber - remainderChannelCount; j++) {
+ taskGroupConfig.get(i + j).set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL, avgChannelsPerTaskGroup);
+ }
+ }
+
+ /**
+ * 根据task 配置,获取到:
+ * 资源名称 --> taskId(List) 的 map 映射关系
+ */
+ private static LinkedHashMap> parseAndGetResourceMarkAndTaskIdMap(List contentConfig) {
+ // key: resourceMark, value: taskId
+ LinkedHashMap> readerResourceMarkAndTaskIdMap = new LinkedHashMap>();
+ LinkedHashMap> writerResourceMarkAndTaskIdMap = new LinkedHashMap>();
+
+ for (Configuration aTaskConfig : contentConfig) {
+ int taskId = aTaskConfig.getInt(CoreConstant.TASK_ID);
+ // 把 readerResourceMark 加到 readerResourceMarkAndTaskIdMap 中
+ String readerResourceMark = aTaskConfig.getString(CoreConstant.JOB_READER_PARAMETER + "." + CommonConstant.LOAD_BALANCE_RESOURCE_MARK);
+ if (readerResourceMarkAndTaskIdMap.get(readerResourceMark) == null) {
+ readerResourceMarkAndTaskIdMap.put(readerResourceMark, new LinkedList());
+ }
+ readerResourceMarkAndTaskIdMap.get(readerResourceMark).add(taskId);
+
+ // 把 writerResourceMark 加到 writerResourceMarkAndTaskIdMap 中
+ String writerResourceMark = aTaskConfig.getString(CoreConstant.JOB_WRITER_PARAMETER + "." + CommonConstant.LOAD_BALANCE_RESOURCE_MARK);
+ if (writerResourceMarkAndTaskIdMap.get(writerResourceMark) == null) {
+ writerResourceMarkAndTaskIdMap.put(writerResourceMark, new LinkedList());
+ }
+ writerResourceMarkAndTaskIdMap.get(writerResourceMark).add(taskId);
+ }
+
+ if (readerResourceMarkAndTaskIdMap.size() >= writerResourceMarkAndTaskIdMap.size()) {
+ // 采用 reader 对资源做的标记进行 shuffle
+ return readerResourceMarkAndTaskIdMap;
+ } else {
+ // 采用 writer 对资源做的标记进行 shuffle
+ return writerResourceMarkAndTaskIdMap;
+ }
+ }
+
+
+ /**
+ * /**
+ * 需要实现的效果通过例子来说是:
+ *
+ * a 库上有表:0, 1, 2
+ * a 库上有表:3, 4
+ * c 库上有表:5, 6, 7
+ *
+ * 如果有 4个 taskGroup
+ * 则 assign 后的结果为:
+ * taskGroup-0: 0, 4,
+ * taskGroup-1: 3, 6,
+ * taskGroup-2: 5, 2,
+ * taskGroup-3: 1, 7
+ *
+ *
+ */
+ private static List doAssign(LinkedHashMap> resourceMarkAndTaskIdMap, Configuration jobConfiguration, int taskGroupNumber) {
+ List contentConfig = jobConfiguration.getListConfiguration(CoreConstant.DATAX_JOB_CONTENT);
+
+ Configuration taskGroupTemplate = jobConfiguration.clone();
+ taskGroupTemplate.remove(CoreConstant.DATAX_JOB_CONTENT);
+
+ List result = new LinkedList();
+
+ List> taskGroupConfigList = new ArrayList>(taskGroupNumber);
+ for (int i = 0; i < taskGroupNumber; i++) {
+ taskGroupConfigList.add(new LinkedList());
+ }
+
+ int mapValueMaxLength = -1;
+
+ List resourceMarks = new ArrayList();
+ for (Map.Entry> entry : resourceMarkAndTaskIdMap.entrySet()) {
+ resourceMarks.add(entry.getKey());
+ if (entry.getValue().size() > mapValueMaxLength) {
+ mapValueMaxLength = entry.getValue().size();
+ }
+ }
+
+ int taskGroupIndex = 0;
+ for (int i = 0; i < mapValueMaxLength; i++) {
+ for (String resourceMark : resourceMarks) {
+ if (resourceMarkAndTaskIdMap.get(resourceMark).size() > 0) {
+ int taskId = resourceMarkAndTaskIdMap.get(resourceMark).get(0);
+ taskGroupConfigList.get(taskGroupIndex % taskGroupNumber).add(contentConfig.get(taskId));
+ taskGroupIndex++;
+
+ resourceMarkAndTaskIdMap.get(resourceMark).remove(0);
+ }
+ }
+ }
+
+ Configuration tempTaskGroupConfig;
+ for (int i = 0; i < taskGroupNumber; i++) {
+ tempTaskGroupConfig = taskGroupTemplate.clone();
+ tempTaskGroupConfig.set(CoreConstant.DATAX_JOB_CONTENT, taskGroupConfigList.get(i));
+ tempTaskGroupConfig.set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_ID, i);
+
+ result.add(tempTaskGroupConfig);
+ }
+
+ return result;
+ }
+
+}
diff --git a/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java b/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java
new file mode 100755
index 0000000000..50f1cf7b8d
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java
@@ -0,0 +1,976 @@
+package com.alibaba.datax.core.job;
+
+import com.alibaba.datax.common.constant.PluginType;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.AbstractJobPlugin;
+import com.alibaba.datax.common.plugin.JobPluginCollector;
+import com.alibaba.datax.common.spi.Reader;
+import com.alibaba.datax.common.spi.Writer;
+import com.alibaba.datax.common.statistics.PerfTrace;
+import com.alibaba.datax.common.statistics.VMInfo;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.common.util.StrUtil;
+import com.alibaba.datax.core.AbstractContainer;
+import com.alibaba.datax.core.Engine;
+import com.alibaba.datax.core.container.util.HookInvoker;
+import com.alibaba.datax.core.container.util.JobAssignUtil;
+import com.alibaba.datax.core.job.scheduler.AbstractScheduler;
+import com.alibaba.datax.core.job.scheduler.processinner.StandAloneScheduler;
+import com.alibaba.datax.core.statistics.communication.Communication;
+import com.alibaba.datax.core.statistics.communication.CommunicationTool;
+import com.alibaba.datax.core.statistics.container.communicator.AbstractContainerCommunicator;
+import com.alibaba.datax.core.statistics.container.communicator.job.StandAloneJobContainerCommunicator;
+import com.alibaba.datax.core.statistics.plugin.DefaultJobPluginCollector;
+import com.alibaba.datax.core.util.ErrorRecordChecker;
+import com.alibaba.datax.core.util.FrameworkErrorCode;
+import com.alibaba.datax.core.util.container.ClassLoaderSwapper;
+import com.alibaba.datax.core.util.container.CoreConstant;
+import com.alibaba.datax.core.util.container.LoadUtil;
+import com.alibaba.datax.dataxservice.face.domain.enums.ExecuteMode;
+import com.alibaba.fastjson.JSON;
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang.Validate;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Created by jingxing on 14-8-24.
+ *
+ * job实例运行在jobContainer容器中,它是所有任务的master,负责初始化、拆分、调度、运行、回收、监控和汇报
+ * 但它并不做实际的数据同步操作
+ */
+public class JobContainer extends AbstractContainer {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(JobContainer.class);
+
+ private static final SimpleDateFormat dateFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+
+ private ClassLoaderSwapper classLoaderSwapper = ClassLoaderSwapper
+ .newCurrentThreadClassLoaderSwapper();
+
+ private long jobId;
+
+ private String readerPluginName;
+
+ private String writerPluginName;
+
+ /**
+ * reader和writer jobContainer的实例
+ */
+ private Reader.Job jobReader;
+
+ private Writer.Job jobWriter;
+
+ private Configuration userConf;
+
+ private long startTimeStamp;
+
+ private long endTimeStamp;
+
+ private long startTransferTimeStamp;
+
+ private long endTransferTimeStamp;
+
+ private int needChannelNumber;
+
+ private int totalStage = 1;
+
+ private ErrorRecordChecker errorLimit;
+
+ public JobContainer(Configuration configuration) {
+ super(configuration);
+
+ errorLimit = new ErrorRecordChecker(configuration);
+ }
+
+ /**
+ * jobContainer主要负责的工作全部在start()里面,包括init、prepare、split、scheduler、
+ * post以及destroy和statistics
+ */
+ @Override
+ public void start() {
+ LOG.info("DataX jobContainer starts job.");
+
+ boolean hasException = false;
+ boolean isDryRun = false;
+ try {
+ this.startTimeStamp = System.currentTimeMillis();
+ isDryRun = configuration.getBool(CoreConstant.DATAX_JOB_SETTING_DRYRUN, false);
+ if(isDryRun) {
+ LOG.info("jobContainer starts to do preCheck ...");
+ this.preCheck();
+ } else {
+ userConf = configuration.clone();
+ LOG.debug("jobContainer starts to do preHandle ...");
+ this.preHandle();
+
+ LOG.debug("jobContainer starts to do init ...");
+ this.init();
+ LOG.info("jobContainer starts to do prepare ...");
+ this.prepare();
+ LOG.info("jobContainer starts to do split ...");
+ this.totalStage = this.split();
+ LOG.info("jobContainer starts to do schedule ...");
+ this.schedule();
+ LOG.debug("jobContainer starts to do post ...");
+ this.post();
+
+ LOG.debug("jobContainer starts to do postHandle ...");
+ this.postHandle();
+ LOG.info("DataX jobId [{}] completed successfully.", this.jobId);
+
+ this.invokeHooks();
+ }
+ } catch (Throwable e) {
+ LOG.error("Exception when job run", e);
+
+ hasException = true;
+
+ if (e instanceof OutOfMemoryError) {
+ this.destroy();
+ System.gc();
+ }
+
+
+ if (super.getContainerCommunicator() == null) {
+ // 由于 containerCollector 是在 scheduler() 中初始化的,所以当在 scheduler() 之前出现异常时,需要在此处对 containerCollector 进行初始化
+
+ AbstractContainerCommunicator tempContainerCollector;
+ // standalone
+ tempContainerCollector = new StandAloneJobContainerCommunicator(configuration);
+
+ super.setContainerCommunicator(tempContainerCollector);
+ }
+
+ Communication communication = super.getContainerCommunicator().collect();
+ // 汇报前的状态,不需要手动进行设置
+ // communication.setState(State.FAILED);
+ communication.setThrowable(e);
+ communication.setTimestamp(this.endTimeStamp);
+
+ Communication tempComm = new Communication();
+ tempComm.setTimestamp(this.startTransferTimeStamp);
+
+ Communication reportCommunication = CommunicationTool.getReportCommunication(communication, tempComm, this.totalStage);
+ super.getContainerCommunicator().report(reportCommunication);
+
+ throw DataXException.asDataXException(
+ FrameworkErrorCode.RUNTIME_ERROR, e);
+ } finally {
+ if(!isDryRun) {
+
+ this.destroy();
+ this.endTimeStamp = System.currentTimeMillis();
+ if (!hasException) {
+ //最后打印cpu的平均消耗,GC的统计
+ VMInfo vmInfo = VMInfo.getVmInfo();
+ if (vmInfo != null) {
+ vmInfo.getDelta(false);
+ LOG.info(vmInfo.totalString());
+ }
+
+ LOG.info(PerfTrace.getInstance().summarizeNoException());
+ this.logStatistics();
+ }
+ }
+ }
+ }
+
+ private void preCheck() {
+ this.preCheckInit();
+ this.adjustChannelNumber();
+
+ if (this.needChannelNumber <= 0) {
+ this.needChannelNumber = 1;
+ }
+ this.preCheckReader();
+ this.preCheckWriter();
+ LOG.info("PreCheck通过");
+ }
+
+ private void preCheckInit() {
+ this.jobId = this.configuration.getLong(
+ CoreConstant.DATAX_CORE_CONTAINER_JOB_ID, -1);
+
+ if (this.jobId < 0) {
+ LOG.info("Set jobId = 0");
+ this.jobId = 0;
+ this.configuration.set(CoreConstant.DATAX_CORE_CONTAINER_JOB_ID,
+ this.jobId);
+ }
+
+ Thread.currentThread().setName("job-" + this.jobId);
+
+ JobPluginCollector jobPluginCollector = new DefaultJobPluginCollector(
+ this.getContainerCommunicator());
+ this.jobReader = this.preCheckReaderInit(jobPluginCollector);
+ this.jobWriter = this.preCheckWriterInit(jobPluginCollector);
+ }
+
+ private Reader.Job preCheckReaderInit(JobPluginCollector jobPluginCollector) {
+ this.readerPluginName = this.configuration.getString(
+ CoreConstant.DATAX_JOB_CONTENT_READER_NAME);
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ PluginType.READER, this.readerPluginName));
+
+ Reader.Job jobReader = (Reader.Job) LoadUtil.loadJobPlugin(
+ PluginType.READER, this.readerPluginName);
+
+ this.configuration.set(CoreConstant.DATAX_JOB_CONTENT_READER_PARAMETER + ".dryRun", true);
+
+ // 设置reader的jobConfig
+ jobReader.setPluginJobConf(this.configuration.getConfiguration(
+ CoreConstant.DATAX_JOB_CONTENT_READER_PARAMETER));
+ // 设置reader的readerConfig
+ jobReader.setPeerPluginJobConf(this.configuration.getConfiguration(
+ CoreConstant.DATAX_JOB_CONTENT_READER_PARAMETER));
+
+ jobReader.setJobPluginCollector(jobPluginCollector);
+
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+ return jobReader;
+ }
+
+
+ private Writer.Job preCheckWriterInit(JobPluginCollector jobPluginCollector) {
+ this.writerPluginName = this.configuration.getString(
+ CoreConstant.DATAX_JOB_CONTENT_WRITER_NAME);
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ PluginType.WRITER, this.writerPluginName));
+
+ Writer.Job jobWriter = (Writer.Job) LoadUtil.loadJobPlugin(
+ PluginType.WRITER, this.writerPluginName);
+
+ this.configuration.set(CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER + ".dryRun", true);
+
+ // 设置writer的jobConfig
+ jobWriter.setPluginJobConf(this.configuration.getConfiguration(
+ CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER));
+ // 设置reader的readerConfig
+ jobWriter.setPeerPluginJobConf(this.configuration.getConfiguration(
+ CoreConstant.DATAX_JOB_CONTENT_READER_PARAMETER));
+
+ jobWriter.setPeerPluginName(this.readerPluginName);
+ jobWriter.setJobPluginCollector(jobPluginCollector);
+
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+
+ return jobWriter;
+ }
+
+ private void preCheckReader() {
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ PluginType.READER, this.readerPluginName));
+ LOG.info(String.format("DataX Reader.Job [%s] do preCheck work .",
+ this.readerPluginName));
+ this.jobReader.preCheck();
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+ }
+
+ private void preCheckWriter() {
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ PluginType.WRITER, this.writerPluginName));
+ LOG.info(String.format("DataX Writer.Job [%s] do preCheck work .",
+ this.writerPluginName));
+ this.jobWriter.preCheck();
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+ }
+
+ /**
+ * reader和writer的初始化
+ */
+ private void init() {
+ this.jobId = this.configuration.getLong(
+ CoreConstant.DATAX_CORE_CONTAINER_JOB_ID, -1);
+
+ if (this.jobId < 0) {
+ LOG.info("Set jobId = 0");
+ this.jobId = 0;
+ this.configuration.set(CoreConstant.DATAX_CORE_CONTAINER_JOB_ID,
+ this.jobId);
+ }
+
+ Thread.currentThread().setName("job-" + this.jobId);
+
+ JobPluginCollector jobPluginCollector = new DefaultJobPluginCollector(
+ this.getContainerCommunicator());
+ //必须先Reader ,后Writer
+ this.jobReader = this.initJobReader(jobPluginCollector);
+ this.jobWriter = this.initJobWriter(jobPluginCollector);
+ }
+
+ private void prepare() {
+ this.prepareJobReader();
+ this.prepareJobWriter();
+ }
+
+ private void preHandle() {
+ String handlerPluginTypeStr = this.configuration.getString(
+ CoreConstant.DATAX_JOB_PREHANDLER_PLUGINTYPE);
+ if(!StringUtils.isNotEmpty(handlerPluginTypeStr)){
+ return;
+ }
+ PluginType handlerPluginType;
+ try {
+ handlerPluginType = PluginType.valueOf(handlerPluginTypeStr.toUpperCase());
+ } catch (IllegalArgumentException e) {
+ throw DataXException.asDataXException(
+ FrameworkErrorCode.CONFIG_ERROR,
+ String.format("Job preHandler's pluginType(%s) set error, reason(%s)", handlerPluginTypeStr.toUpperCase(), e.getMessage()));
+ }
+
+ String handlerPluginName = this.configuration.getString(
+ CoreConstant.DATAX_JOB_PREHANDLER_PLUGINNAME);
+
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ handlerPluginType, handlerPluginName));
+
+ AbstractJobPlugin handler = LoadUtil.loadJobPlugin(
+ handlerPluginType, handlerPluginName);
+
+ JobPluginCollector jobPluginCollector = new DefaultJobPluginCollector(
+ this.getContainerCommunicator());
+ handler.setJobPluginCollector(jobPluginCollector);
+
+ //todo configuration的安全性,将来必须保证
+ handler.preHandler(configuration);
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+
+ LOG.info("After PreHandler: \n" + Engine.filterJobConfiguration(configuration) + "\n");
+ }
+
+ private void postHandle() {
+ String handlerPluginTypeStr = this.configuration.getString(
+ CoreConstant.DATAX_JOB_POSTHANDLER_PLUGINTYPE);
+
+ if(!StringUtils.isNotEmpty(handlerPluginTypeStr)){
+ return;
+ }
+ PluginType handlerPluginType;
+ try {
+ handlerPluginType = PluginType.valueOf(handlerPluginTypeStr.toUpperCase());
+ } catch (IllegalArgumentException e) {
+ throw DataXException.asDataXException(
+ FrameworkErrorCode.CONFIG_ERROR,
+ String.format("Job postHandler's pluginType(%s) set error, reason(%s)", handlerPluginTypeStr.toUpperCase(), e.getMessage()));
+ }
+
+ String handlerPluginName = this.configuration.getString(
+ CoreConstant.DATAX_JOB_POSTHANDLER_PLUGINNAME);
+
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ handlerPluginType, handlerPluginName));
+
+ AbstractJobPlugin handler = LoadUtil.loadJobPlugin(
+ handlerPluginType, handlerPluginName);
+
+ JobPluginCollector jobPluginCollector = new DefaultJobPluginCollector(
+ this.getContainerCommunicator());
+ handler.setJobPluginCollector(jobPluginCollector);
+
+ handler.postHandler(configuration);
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+ }
+
+
+ /**
+ * 执行reader和writer最细粒度的切分,需要注意的是,writer的切分结果要参照reader的切分结果,
+ * 达到切分后数目相等,才能满足1:1的通道模型,所以这里可以将reader和writer的配置整合到一起,
+ * 然后,为避免顺序给读写端带来长尾影响,将整合的结果shuffler掉
+ */
+ private int split() {
+ this.adjustChannelNumber();
+
+ if (this.needChannelNumber <= 0) {
+ this.needChannelNumber = 1;
+ }
+
+ List readerTaskConfigs = this
+ .doReaderSplit(this.needChannelNumber);
+ int taskNumber = readerTaskConfigs.size();
+ List writerTaskConfigs = this
+ .doWriterSplit(taskNumber);
+
+ List transformerList = this.configuration.getListConfiguration(CoreConstant.DATAX_JOB_CONTENT_TRANSFORMER);
+
+ LOG.debug("transformer configuration: "+ JSON.toJSONString(transformerList));
+ /**
+ * 输入是reader和writer的parameter list,输出是content下面元素的list
+ */
+ List contentConfig = mergeReaderAndWriterTaskConfigs(
+ readerTaskConfigs, writerTaskConfigs, transformerList);
+
+
+ LOG.debug("contentConfig configuration: "+ JSON.toJSONString(contentConfig));
+
+ this.configuration.set(CoreConstant.DATAX_JOB_CONTENT, contentConfig);
+
+ return contentConfig.size();
+ }
+
+ private void adjustChannelNumber() {
+ int needChannelNumberByByte = Integer.MAX_VALUE;
+ int needChannelNumberByRecord = Integer.MAX_VALUE;
+
+ boolean isByteLimit = (this.configuration.getInt(
+ CoreConstant.DATAX_JOB_SETTING_SPEED_BYTE, 0) > 0);
+ if (isByteLimit) {
+ long globalLimitedByteSpeed = this.configuration.getInt(
+ CoreConstant.DATAX_JOB_SETTING_SPEED_BYTE, 10 * 1024 * 1024);
+
+ // 在byte流控情况下,单个Channel流量最大值必须设置,否则报错!
+ Long channelLimitedByteSpeed = this.configuration
+ .getLong(CoreConstant.DATAX_CORE_TRANSPORT_CHANNEL_SPEED_BYTE);
+ if (channelLimitedByteSpeed == null || channelLimitedByteSpeed <= 0) {
+ DataXException.asDataXException(
+ FrameworkErrorCode.CONFIG_ERROR,
+ "在有总bps限速条件下,单个channel的bps值不能为空,也不能为非正数");
+ }
+
+ needChannelNumberByByte =
+ (int) (globalLimitedByteSpeed / channelLimitedByteSpeed);
+ needChannelNumberByByte =
+ needChannelNumberByByte > 0 ? needChannelNumberByByte : 1;
+ LOG.info("Job set Max-Byte-Speed to " + globalLimitedByteSpeed + " bytes.");
+ }
+
+ boolean isRecordLimit = (this.configuration.getInt(
+ CoreConstant.DATAX_JOB_SETTING_SPEED_RECORD, 0)) > 0;
+ if (isRecordLimit) {
+ long globalLimitedRecordSpeed = this.configuration.getInt(
+ CoreConstant.DATAX_JOB_SETTING_SPEED_RECORD, 100000);
+
+ Long channelLimitedRecordSpeed = this.configuration.getLong(
+ CoreConstant.DATAX_CORE_TRANSPORT_CHANNEL_SPEED_RECORD);
+ if (channelLimitedRecordSpeed == null || channelLimitedRecordSpeed <= 0) {
+ DataXException.asDataXException(FrameworkErrorCode.CONFIG_ERROR,
+ "在有总tps限速条件下,单个channel的tps值不能为空,也不能为非正数");
+ }
+
+ needChannelNumberByRecord =
+ (int) (globalLimitedRecordSpeed / channelLimitedRecordSpeed);
+ needChannelNumberByRecord =
+ needChannelNumberByRecord > 0 ? needChannelNumberByRecord : 1;
+ LOG.info("Job set Max-Record-Speed to " + globalLimitedRecordSpeed + " records.");
+ }
+
+ // 取较小值
+ this.needChannelNumber = needChannelNumberByByte < needChannelNumberByRecord ?
+ needChannelNumberByByte : needChannelNumberByRecord;
+
+ // 如果从byte或record上设置了needChannelNumber则退出
+ if (this.needChannelNumber < Integer.MAX_VALUE) {
+ return;
+ }
+
+ boolean isChannelLimit = (this.configuration.getInt(
+ CoreConstant.DATAX_JOB_SETTING_SPEED_CHANNEL, 0) > 0);
+ if (isChannelLimit) {
+ this.needChannelNumber = this.configuration.getInt(
+ CoreConstant.DATAX_JOB_SETTING_SPEED_CHANNEL);
+
+ LOG.info("Job set Channel-Number to " + this.needChannelNumber
+ + " channels.");
+
+ return;
+ }
+
+ throw DataXException.asDataXException(
+ FrameworkErrorCode.CONFIG_ERROR,
+ "Job运行速度必须设置");
+ }
+
+ /**
+ * schedule首先完成的工作是把上一步reader和writer split的结果整合到具体taskGroupContainer中,
+ * 同时不同的执行模式调用不同的调度策略,将所有任务调度起来
+ */
+ private void schedule() {
+ /**
+ * 这里的全局speed和每个channel的速度设置为B/s
+ */
+ int channelsPerTaskGroup = this.configuration.getInt(
+ CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL, 5);
+ int taskNumber = this.configuration.getList(
+ CoreConstant.DATAX_JOB_CONTENT).size();
+
+ this.needChannelNumber = Math.min(this.needChannelNumber, taskNumber);
+ PerfTrace.getInstance().setChannelNumber(needChannelNumber);
+
+ /**
+ * 通过获取配置信息得到每个taskGroup需要运行哪些tasks任务
+ */
+
+ List taskGroupConfigs = JobAssignUtil.assignFairly(this.configuration,
+ this.needChannelNumber, channelsPerTaskGroup);
+
+ LOG.info("Scheduler starts [{}] taskGroups.", taskGroupConfigs.size());
+
+ ExecuteMode executeMode = null;
+ AbstractScheduler scheduler;
+ try {
+ executeMode = ExecuteMode.STANDALONE;
+ scheduler = initStandaloneScheduler(this.configuration);
+
+ //设置 executeMode
+ for (Configuration taskGroupConfig : taskGroupConfigs) {
+ taskGroupConfig.set(CoreConstant.DATAX_CORE_CONTAINER_JOB_MODE, executeMode.getValue());
+ }
+
+ if (executeMode == ExecuteMode.LOCAL || executeMode == ExecuteMode.DISTRIBUTE) {
+ if (this.jobId <= 0) {
+ throw DataXException.asDataXException(FrameworkErrorCode.RUNTIME_ERROR,
+ "在[ local | distribute ]模式下必须设置jobId,并且其值 > 0 .");
+ }
+ }
+
+ LOG.info("Running by {} Mode.", executeMode);
+
+ this.startTransferTimeStamp = System.currentTimeMillis();
+
+ scheduler.schedule(taskGroupConfigs);
+
+ this.endTransferTimeStamp = System.currentTimeMillis();
+ } catch (Exception e) {
+ LOG.error("运行scheduler 模式[{}]出错.", executeMode);
+ this.endTransferTimeStamp = System.currentTimeMillis();
+ throw DataXException.asDataXException(
+ FrameworkErrorCode.RUNTIME_ERROR, e);
+ }
+
+ /**
+ * 检查任务执行情况
+ */
+ this.checkLimit();
+ }
+
+
+ private AbstractScheduler initStandaloneScheduler(Configuration configuration) {
+ AbstractContainerCommunicator containerCommunicator = new StandAloneJobContainerCommunicator(configuration);
+ super.setContainerCommunicator(containerCommunicator);
+
+ return new StandAloneScheduler(containerCommunicator);
+ }
+
+ private void post() {
+ this.postJobWriter();
+ this.postJobReader();
+ }
+
+ private void destroy() {
+ if (this.jobWriter != null) {
+ this.jobWriter.destroy();
+ this.jobWriter = null;
+ }
+ if (this.jobReader != null) {
+ this.jobReader.destroy();
+ this.jobReader = null;
+ }
+ }
+
+ private void logStatistics() {
+ long totalCosts = (this.endTimeStamp - this.startTimeStamp) / 1000;
+ long transferCosts = (this.endTransferTimeStamp - this.startTransferTimeStamp) / 1000;
+ if (0L == transferCosts) {
+ transferCosts = 1L;
+ }
+
+ if (super.getContainerCommunicator() == null) {
+ return;
+ }
+
+ Communication communication = super.getContainerCommunicator().collect();
+ communication.setTimestamp(this.endTimeStamp);
+
+ Communication tempComm = new Communication();
+ tempComm.setTimestamp(this.startTransferTimeStamp);
+
+ Communication reportCommunication = CommunicationTool.getReportCommunication(communication, tempComm, this.totalStage);
+
+ // 字节速率
+ long byteSpeedPerSecond = communication.getLongCounter(CommunicationTool.READ_SUCCEED_BYTES)
+ / transferCosts;
+
+ long recordSpeedPerSecond = communication.getLongCounter(CommunicationTool.READ_SUCCEED_RECORDS)
+ / transferCosts;
+
+ reportCommunication.setLongCounter(CommunicationTool.BYTE_SPEED, byteSpeedPerSecond);
+ reportCommunication.setLongCounter(CommunicationTool.RECORD_SPEED, recordSpeedPerSecond);
+
+ super.getContainerCommunicator().report(reportCommunication);
+
+
+ LOG.info(String.format(
+ "\n" + "%-26s: %-18s\n" + "%-26s: %-18s\n" + "%-26s: %19s\n"
+ + "%-26s: %19s\n" + "%-26s: %19s\n" + "%-26s: %19s\n"
+ + "%-26s: %19s\n",
+ "任务启动时刻",
+ dateFormat.format(startTimeStamp),
+
+ "任务结束时刻",
+ dateFormat.format(endTimeStamp),
+
+ "任务总计耗时",
+ String.valueOf(totalCosts) + "s",
+ "任务平均流量",
+ StrUtil.stringify(byteSpeedPerSecond)
+ + "/s",
+ "记录写入速度",
+ String.valueOf(recordSpeedPerSecond)
+ + "rec/s", "读出记录总数",
+ String.valueOf(CommunicationTool.getTotalReadRecords(communication)),
+ "读写失败总数",
+ String.valueOf(CommunicationTool.getTotalErrorRecords(communication))
+ ));
+
+ if (communication.getLongCounter(CommunicationTool.TRANSFORMER_SUCCEED_RECORDS) > 0
+ || communication.getLongCounter(CommunicationTool.TRANSFORMER_FAILED_RECORDS) > 0
+ || communication.getLongCounter(CommunicationTool.TRANSFORMER_FILTER_RECORDS) > 0) {
+ LOG.info(String.format(
+ "\n" + "%-26s: %19s\n" + "%-26s: %19s\n" + "%-26s: %19s\n",
+ "Transformer成功记录总数",
+ communication.getLongCounter(CommunicationTool.TRANSFORMER_SUCCEED_RECORDS),
+
+ "Transformer失败记录总数",
+ communication.getLongCounter(CommunicationTool.TRANSFORMER_FAILED_RECORDS),
+
+ "Transformer过滤记录总数",
+ communication.getLongCounter(CommunicationTool.TRANSFORMER_FILTER_RECORDS)
+ ));
+ }
+
+
+ }
+
+ /**
+ * reader job的初始化,返回Reader.Job
+ *
+ * @return
+ */
+ private Reader.Job initJobReader(
+ JobPluginCollector jobPluginCollector) {
+ this.readerPluginName = this.configuration.getString(
+ CoreConstant.DATAX_JOB_CONTENT_READER_NAME);
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ PluginType.READER, this.readerPluginName));
+
+ Reader.Job jobReader = (Reader.Job) LoadUtil.loadJobPlugin(
+ PluginType.READER, this.readerPluginName);
+
+ // 设置reader的jobConfig
+ jobReader.setPluginJobConf(this.configuration.getConfiguration(
+ CoreConstant.DATAX_JOB_CONTENT_READER_PARAMETER));
+
+ // 设置reader的readerConfig
+ jobReader.setPeerPluginJobConf(this.configuration.getConfiguration(
+ CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER));
+
+ jobReader.setJobPluginCollector(jobPluginCollector);
+ jobReader.init();
+
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+ return jobReader;
+ }
+
+ /**
+ * writer job的初始化,返回Writer.Job
+ *
+ * @return
+ */
+ private Writer.Job initJobWriter(
+ JobPluginCollector jobPluginCollector) {
+ this.writerPluginName = this.configuration.getString(
+ CoreConstant.DATAX_JOB_CONTENT_WRITER_NAME);
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ PluginType.WRITER, this.writerPluginName));
+
+ Writer.Job jobWriter = (Writer.Job) LoadUtil.loadJobPlugin(
+ PluginType.WRITER, this.writerPluginName);
+
+ // 设置writer的jobConfig
+ jobWriter.setPluginJobConf(this.configuration.getConfiguration(
+ CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER));
+
+ // 设置reader的readerConfig
+ jobWriter.setPeerPluginJobConf(this.configuration.getConfiguration(
+ CoreConstant.DATAX_JOB_CONTENT_READER_PARAMETER));
+
+ jobWriter.setPeerPluginName(this.readerPluginName);
+ jobWriter.setJobPluginCollector(jobPluginCollector);
+ jobWriter.init();
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+
+ return jobWriter;
+ }
+
+ private void prepareJobReader() {
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ PluginType.READER, this.readerPluginName));
+ LOG.info(String.format("DataX Reader.Job [%s] do prepare work .",
+ this.readerPluginName));
+ this.jobReader.prepare();
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+ }
+
+ private void prepareJobWriter() {
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ PluginType.WRITER, this.writerPluginName));
+ LOG.info(String.format("DataX Writer.Job [%s] do prepare work .",
+ this.writerPluginName));
+ this.jobWriter.prepare();
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+ }
+
+ // TODO: 如果源头就是空数据
+ private List doReaderSplit(int adviceNumber) {
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ PluginType.READER, this.readerPluginName));
+ List readerSlicesConfigs =
+ this.jobReader.split(adviceNumber);
+ if (readerSlicesConfigs == null || readerSlicesConfigs.size() <= 0) {
+ throw DataXException.asDataXException(
+ FrameworkErrorCode.PLUGIN_SPLIT_ERROR,
+ "reader切分的task数目不能小于等于0");
+ }
+ LOG.info("DataX Reader.Job [{}] splits to [{}] tasks.",
+ this.readerPluginName, readerSlicesConfigs.size());
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+ return readerSlicesConfigs;
+ }
+
+ private List doWriterSplit(int readerTaskNumber) {
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ PluginType.WRITER, this.writerPluginName));
+
+ List writerSlicesConfigs = this.jobWriter
+ .split(readerTaskNumber);
+ if (writerSlicesConfigs == null || writerSlicesConfigs.size() <= 0) {
+ throw DataXException.asDataXException(
+ FrameworkErrorCode.PLUGIN_SPLIT_ERROR,
+ "writer切分的task不能小于等于0");
+ }
+ LOG.info("DataX Writer.Job [{}] splits to [{}] tasks.",
+ this.writerPluginName, writerSlicesConfigs.size());
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+
+ return writerSlicesConfigs;
+ }
+
+ /**
+ * 按顺序整合reader和writer的配置,这里的顺序不能乱! 输入是reader、writer级别的配置,输出是一个完整task的配置
+ *
+ * @param readerTasksConfigs
+ * @param writerTasksConfigs
+ * @return
+ */
+ private List mergeReaderAndWriterTaskConfigs(
+ List readerTasksConfigs,
+ List writerTasksConfigs) {
+ return mergeReaderAndWriterTaskConfigs(readerTasksConfigs, writerTasksConfigs, null);
+ }
+
+ private List mergeReaderAndWriterTaskConfigs(
+ List readerTasksConfigs,
+ List writerTasksConfigs,
+ List transformerConfigs) {
+ if (readerTasksConfigs.size() != writerTasksConfigs.size()) {
+ throw DataXException.asDataXException(
+ FrameworkErrorCode.PLUGIN_SPLIT_ERROR,
+ String.format("reader切分的task数目[%d]不等于writer切分的task数目[%d].",
+ readerTasksConfigs.size(), writerTasksConfigs.size())
+ );
+ }
+
+ List contentConfigs = new ArrayList();
+ for (int i = 0; i < readerTasksConfigs.size(); i++) {
+ Configuration taskConfig = Configuration.newDefault();
+ taskConfig.set(CoreConstant.JOB_READER_NAME,
+ this.readerPluginName);
+ taskConfig.set(CoreConstant.JOB_READER_PARAMETER,
+ readerTasksConfigs.get(i));
+ taskConfig.set(CoreConstant.JOB_WRITER_NAME,
+ this.writerPluginName);
+ taskConfig.set(CoreConstant.JOB_WRITER_PARAMETER,
+ writerTasksConfigs.get(i));
+
+ if(transformerConfigs!=null && transformerConfigs.size()>0){
+ taskConfig.set(CoreConstant.JOB_TRANSFORMER, transformerConfigs);
+ }
+
+ taskConfig.set(CoreConstant.TASK_ID, i);
+ contentConfigs.add(taskConfig);
+ }
+
+ return contentConfigs;
+ }
+
+ /**
+ * 这里比较复杂,分两步整合 1. tasks到channel 2. channel到taskGroup
+ * 合起来考虑,其实就是把tasks整合到taskGroup中,需要满足计算出的channel数,同时不能多起channel
+ *
+ * example:
+ *
+ * 前提条件: 切分后是1024个分表,假设用户要求总速率是1000M/s,每个channel的速率的3M/s,
+ * 每个taskGroup负责运行7个channel
+ *
+ * 计算: 总channel数为:1000M/s / 3M/s =
+ * 333个,为平均分配,计算可知有308个每个channel有3个tasks,而有25个每个channel有4个tasks,
+ * 需要的taskGroup数为:333 / 7 =
+ * 47...4,也就是需要48个taskGroup,47个是每个负责7个channel,有4个负责1个channel
+ *
+ * 处理:我们先将这负责4个channel的taskGroup处理掉,逻辑是:
+ * 先按平均为3个tasks找4个channel,设置taskGroupId为0,
+ * 接下来就像发牌一样轮询分配task到剩下的包含平均channel数的taskGroup中
+ *
+ * TODO delete it
+ *
+ * @param averTaskPerChannel
+ * @param channelNumber
+ * @param channelsPerTaskGroup
+ * @return 每个taskGroup独立的全部配置
+ */
+ @SuppressWarnings("serial")
+ private List distributeTasksToTaskGroup(
+ int averTaskPerChannel, int channelNumber,
+ int channelsPerTaskGroup) {
+ Validate.isTrue(averTaskPerChannel > 0 && channelNumber > 0
+ && channelsPerTaskGroup > 0,
+ "每个channel的平均task数[averTaskPerChannel],channel数目[channelNumber],每个taskGroup的平均channel数[channelsPerTaskGroup]都应该为正数");
+ List taskConfigs = this.configuration
+ .getListConfiguration(CoreConstant.DATAX_JOB_CONTENT);
+ int taskGroupNumber = channelNumber / channelsPerTaskGroup;
+ int leftChannelNumber = channelNumber % channelsPerTaskGroup;
+ if (leftChannelNumber > 0) {
+ taskGroupNumber += 1;
+ }
+
+ /**
+ * 如果只有一个taskGroup,直接打标返回
+ */
+ if (taskGroupNumber == 1) {
+ final Configuration taskGroupConfig = this.configuration.clone();
+ /**
+ * configure的clone不能clone出
+ */
+ taskGroupConfig.set(CoreConstant.DATAX_JOB_CONTENT, this.configuration
+ .getListConfiguration(CoreConstant.DATAX_JOB_CONTENT));
+ taskGroupConfig.set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL,
+ channelNumber);
+ taskGroupConfig.set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_ID, 0);
+ return new ArrayList() {
+ {
+ add(taskGroupConfig);
+ }
+ };
+ }
+
+ List taskGroupConfigs = new ArrayList();
+ /**
+ * 将每个taskGroup中content的配置清空
+ */
+ for (int i = 0; i < taskGroupNumber; i++) {
+ Configuration taskGroupConfig = this.configuration.clone();
+ List taskGroupJobContent = taskGroupConfig
+ .getListConfiguration(CoreConstant.DATAX_JOB_CONTENT);
+ taskGroupJobContent.clear();
+ taskGroupConfig.set(CoreConstant.DATAX_JOB_CONTENT, taskGroupJobContent);
+
+ taskGroupConfigs.add(taskGroupConfig);
+ }
+
+ int taskConfigIndex = 0;
+ int channelIndex = 0;
+ int taskGroupConfigIndex = 0;
+
+ /**
+ * 先处理掉taskGroup包含channel数不是平均值的taskGroup
+ */
+ if (leftChannelNumber > 0) {
+ Configuration taskGroupConfig = taskGroupConfigs.get(taskGroupConfigIndex);
+ for (; channelIndex < leftChannelNumber; channelIndex++) {
+ for (int i = 0; i < averTaskPerChannel; i++) {
+ List taskGroupJobContent = taskGroupConfig
+ .getListConfiguration(CoreConstant.DATAX_JOB_CONTENT);
+ taskGroupJobContent.add(taskConfigs.get(taskConfigIndex++));
+ taskGroupConfig.set(CoreConstant.DATAX_JOB_CONTENT,
+ taskGroupJobContent);
+ }
+ }
+
+ taskGroupConfig.set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL,
+ leftChannelNumber);
+ taskGroupConfig.set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_ID,
+ taskGroupConfigIndex++);
+ }
+
+ /**
+ * 下面需要轮询分配,并打上channel数和taskGroupId标记
+ */
+ int equalDivisionStartIndex = taskGroupConfigIndex;
+ for (; taskConfigIndex < taskConfigs.size()
+ && equalDivisionStartIndex < taskGroupConfigs.size(); ) {
+ for (taskGroupConfigIndex = equalDivisionStartIndex; taskGroupConfigIndex < taskGroupConfigs
+ .size() && taskConfigIndex < taskConfigs.size(); taskGroupConfigIndex++) {
+ Configuration taskGroupConfig = taskGroupConfigs.get(taskGroupConfigIndex);
+ List taskGroupJobContent = taskGroupConfig
+ .getListConfiguration(CoreConstant.DATAX_JOB_CONTENT);
+ taskGroupJobContent.add(taskConfigs.get(taskConfigIndex++));
+ taskGroupConfig.set(
+ CoreConstant.DATAX_JOB_CONTENT, taskGroupJobContent);
+ }
+ }
+
+ for (taskGroupConfigIndex = equalDivisionStartIndex;
+ taskGroupConfigIndex < taskGroupConfigs.size(); ) {
+ Configuration taskGroupConfig = taskGroupConfigs.get(taskGroupConfigIndex);
+ taskGroupConfig.set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL,
+ channelsPerTaskGroup);
+ taskGroupConfig.set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_ID,
+ taskGroupConfigIndex++);
+ }
+
+ return taskGroupConfigs;
+ }
+
+ private void postJobReader() {
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ PluginType.READER, this.readerPluginName));
+ LOG.info("DataX Reader.Job [{}] do post work.",
+ this.readerPluginName);
+ this.jobReader.post();
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+ }
+
+ private void postJobWriter() {
+ classLoaderSwapper.setCurrentThreadClassLoader(LoadUtil.getJarLoader(
+ PluginType.WRITER, this.writerPluginName));
+ LOG.info("DataX Writer.Job [{}] do post work.",
+ this.writerPluginName);
+ this.jobWriter.post();
+ classLoaderSwapper.restoreCurrentThreadClassLoader();
+ }
+
+ /**
+ * 检查最终结果是否超出阈值,如果阈值设定小于1,则表示百分数阈值,大于1表示条数阈值。
+ *
+ * @param
+ */
+ private void checkLimit() {
+ Communication communication = super.getContainerCommunicator().collect();
+ errorLimit.checkRecordLimit(communication);
+ errorLimit.checkPercentageLimit(communication);
+ }
+
+ /**
+ * 调用外部hook
+ */
+ private void invokeHooks() {
+ Communication comm = super.getContainerCommunicator().collect();
+ HookInvoker invoker = new HookInvoker(CoreConstant.DATAX_HOME + "/hook", configuration, comm.getCounter());
+ invoker.invokeAll();
+ }
+}
diff --git a/core/src/main/java/com/alibaba/datax/core/job/meta/ExecuteMode.java b/core/src/main/java/com/alibaba/datax/core/job/meta/ExecuteMode.java
new file mode 100644
index 0000000000..956f9c4b2d
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/job/meta/ExecuteMode.java
@@ -0,0 +1,22 @@
+package com.alibaba.datax.core.job.meta;
+
+/**
+ * Created by liupeng on 15/12/21.
+ */
+public enum ExecuteMode {
+ STANDALONE("standalone"), ;
+
+ String value;
+
+ private ExecuteMode(String value) {
+ this.value = value;
+ }
+
+ public String value() {
+ return this.value;
+ }
+
+ public String getValue() {
+ return this.value;
+ }
+}
diff --git a/core/src/main/java/com/alibaba/datax/core/job/meta/State.java b/core/src/main/java/com/alibaba/datax/core/job/meta/State.java
new file mode 100644
index 0000000000..2a1dd227e6
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/job/meta/State.java
@@ -0,0 +1,32 @@
+package com.alibaba.datax.core.job.meta;
+
+/**
+ * Created by liupeng on 15/12/21.
+ */
+public enum State {
+ SUBMITTING(10),
+ WAITING(20),
+ RUNNING(30),
+ KILLING(40),
+ KILLED(50),
+ FAILED(60),
+ SUCCEEDED(70), ;
+
+ int value;
+
+ private State(int value) {
+ this.value = value;
+ }
+
+ public int value() {
+ return this.value;
+ }
+
+ public boolean isFinished() {
+ return this == KILLED || this == FAILED || this == SUCCEEDED;
+ }
+
+ public boolean isRunning() {
+ return !this.isFinished();
+ }
+}
diff --git a/core/src/main/java/com/alibaba/datax/core/job/scheduler/AbstractScheduler.java b/core/src/main/java/com/alibaba/datax/core/job/scheduler/AbstractScheduler.java
new file mode 100755
index 0000000000..ab2b5aa327
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/job/scheduler/AbstractScheduler.java
@@ -0,0 +1,135 @@
+package com.alibaba.datax.core.job.scheduler;
+
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.core.statistics.communication.Communication;
+import com.alibaba.datax.core.statistics.communication.CommunicationTool;
+import com.alibaba.datax.core.statistics.container.communicator.AbstractContainerCommunicator;
+import com.alibaba.datax.core.util.ErrorRecordChecker;
+import com.alibaba.datax.core.util.FrameworkErrorCode;
+import com.alibaba.datax.core.util.container.CoreConstant;
+import com.alibaba.datax.dataxservice.face.domain.enums.State;
+import org.apache.commons.lang.Validate;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+
+public abstract class AbstractScheduler {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(AbstractScheduler.class);
+
+ private ErrorRecordChecker errorLimit;
+
+ private AbstractContainerCommunicator containerCommunicator;
+
+ private Long jobId;
+
+ public Long getJobId() {
+ return jobId;
+ }
+
+ public AbstractScheduler(AbstractContainerCommunicator containerCommunicator) {
+ this.containerCommunicator = containerCommunicator;
+ }
+
+ public void schedule(List configurations) {
+ Validate.notNull(configurations,
+ "scheduler配置不能为空");
+ int jobReportIntervalInMillSec = configurations.get(0).getInt(
+ CoreConstant.DATAX_CORE_CONTAINER_JOB_REPORTINTERVAL, 30000);
+ int jobSleepIntervalInMillSec = configurations.get(0).getInt(
+ CoreConstant.DATAX_CORE_CONTAINER_JOB_SLEEPINTERVAL, 10000);
+
+ this.jobId = configurations.get(0).getLong(
+ CoreConstant.DATAX_CORE_CONTAINER_JOB_ID);
+
+ errorLimit = new ErrorRecordChecker(configurations.get(0));
+
+ /**
+ * 给 taskGroupContainer 的 Communication 注册
+ */
+ this.containerCommunicator.registerCommunication(configurations);
+
+ int totalTasks = calculateTaskCount(configurations);
+ startAllTaskGroup(configurations);
+
+ Communication lastJobContainerCommunication = new Communication();
+
+ long lastReportTimeStamp = System.currentTimeMillis();
+ try {
+ while (true) {
+ /**
+ * step 1: collect job stat
+ * step 2: getReport info, then report it
+ * step 3: errorLimit do check
+ * step 4: dealSucceedStat();
+ * step 5: dealKillingStat();
+ * step 6: dealFailedStat();
+ * step 7: refresh last job stat, and then sleep for next while
+ *
+ * above steps, some ones should report info to DS
+ *
+ */
+ Communication nowJobContainerCommunication = this.containerCommunicator.collect();
+ nowJobContainerCommunication.setTimestamp(System.currentTimeMillis());
+ LOG.debug(nowJobContainerCommunication.toString());
+
+ //汇报周期
+ long now = System.currentTimeMillis();
+ if (now - lastReportTimeStamp > jobReportIntervalInMillSec) {
+ Communication reportCommunication = CommunicationTool
+ .getReportCommunication(nowJobContainerCommunication, lastJobContainerCommunication, totalTasks);
+
+ this.containerCommunicator.report(reportCommunication);
+ lastReportTimeStamp = now;
+ lastJobContainerCommunication = nowJobContainerCommunication;
+ }
+
+ errorLimit.checkRecordLimit(nowJobContainerCommunication);
+
+ if (nowJobContainerCommunication.getState() == State.SUCCEEDED) {
+ LOG.info("Scheduler accomplished all tasks.");
+ break;
+ }
+
+ if (isJobKilling(this.getJobId())) {
+ dealKillingStat(this.containerCommunicator, totalTasks);
+ } else if (nowJobContainerCommunication.getState() == State.FAILED) {
+ dealFailedStat(this.containerCommunicator, nowJobContainerCommunication.getThrowable());
+ }
+
+ Thread.sleep(jobSleepIntervalInMillSec);
+ }
+ } catch (InterruptedException e) {
+ // 以 failed 状态退出
+ LOG.error("捕获到InterruptedException异常!", e);
+
+ throw DataXException.asDataXException(
+ FrameworkErrorCode.RUNTIME_ERROR, e);
+ }
+
+ }
+
+ protected abstract void startAllTaskGroup(List configurations);
+
+ protected abstract void dealFailedStat(AbstractContainerCommunicator frameworkCollector, Throwable throwable);
+
+ protected abstract void dealKillingStat(AbstractContainerCommunicator frameworkCollector, int totalTasks);
+
+ private int calculateTaskCount(List configurations) {
+ int totalTasks = 0;
+ for (Configuration taskGroupConfiguration : configurations) {
+ totalTasks += taskGroupConfiguration.getListConfiguration(
+ CoreConstant.DATAX_JOB_CONTENT).size();
+ }
+ return totalTasks;
+ }
+
+// private boolean isJobKilling(Long jobId) {
+// Result