From b6ab39f8e2362c93cbad319916632cfb8ee5cb02 Mon Sep 17 00:00:00 2001
From: Anqi <16240361+Nicole00@users.noreply.github.com>
Date: Sat, 10 Oct 2020 17:06:09 +0800
Subject: [PATCH] Support nebula-spark reader v1.0 (#155)

* Support nebula-spark reader v1.0
---
 .gitignore                                    |   1 +
 client/pom.xml                                |   2 +-
 tools/nebula-spark/pom.xml                    |  80 ++++++++++++-
 .../com/vesoft/nebula/bean/ConnectInfo.java   |  50 ---------
 .../vesoft/nebula/bean/DataSourceConfig.java  |  92 +++++++++++++++
 .../com/vesoft/nebula/bean/Parameters.java    |  17 +++
 .../java/com/vesoft/nebula/bean/ScanInfo.java |  74 ------------
 .../com/vesoft/nebula/common/Checkable.java   |  13 ---
 .../java/com/vesoft/nebula/common/Type.java   |   2 +-
 .../nebula/example/NebulaReaderExample.java   |  30 +++--
 .../nebula/reader/AbstractNebulaIterator.java |  48 +++-----
 .../vesoft/nebula/reader/DefaultSource.java   |  77 +++++++++++++
 .../nebula/reader/NebulaDataSource.java       |  47 --------
 .../vesoft/nebula/reader/NebulaPartition.java |   9 +-
 .../com/vesoft/nebula/reader/NebulaRDD.java   |  39 ++++---
 .../vesoft/nebula/reader/NebulaRelation.java  | 106 +++++++++++-------
 .../nebula/reader/ScanEdgeIterator.java       |  64 ++++++-----
 .../nebula/reader/ScanVertexIterator.java     |  62 +++++-----
 .../vesoft/nebula/util/DataTypeConverter.java |  18 +++
 ...pache.spark.sql.sources.DataSourceRegister |   2 +
 20 files changed, 471 insertions(+), 362 deletions(-)
 delete mode 100644 tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/ConnectInfo.java
 create mode 100644 tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/DataSourceConfig.java
 create mode 100644 tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/Parameters.java
 delete mode 100644 tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/ScanInfo.java
 delete mode 100644 tools/nebula-spark/src/main/java/com/vesoft/nebula/common/Checkable.java
 create mode 100644 tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/DefaultSource.java
 delete mode 100644 tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaDataSource.java
 create mode 100644 tools/nebula-spark/src/main/java/com/vesoft/nebula/util/DataTypeConverter.java
 create mode 100644 tools/nebula-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
diff --git a/.gitignore b/.gitignore
index 06163b7b5..84e7a6bca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,4 @@ target/
 spark-importer.ipr
 spark-importer.iws
 
+.DS_Store
diff --git a/client/pom.xml b/client/pom.xml
index b26194fad..0dbb9d696 100644
--- a/client/pom.xml
+++ b/client/pom.xml
@@ -174,7 +174,7 @@
             <plugin>
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>findbugs-maven-plugin</artifactId>
-                <version>3.0.6-SNAPSHOT</version>
+                <version>3.0.4</version>
                 <configuration>
                     <threshold>High</threshold>
                     <effort>Default</effort>
diff --git a/tools/nebula-spark/pom.xml b/tools/nebula-spark/pom.xml
index 9aa7beb3f..ff6238fc3 100644
--- a/tools/nebula-spark/pom.xml
+++ b/tools/nebula-spark/pom.xml
@@ -21,17 +21,17 @@
     <dependencies>
         <dependency>
             <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_2.12</artifactId>
+            <artifactId>spark-core_2.11</artifactId>
             <version>${spark.version}</version>
         </dependency>
         <dependency>
             <groupId>org.apache.spark</groupId>
-            <artifactId>spark-graphx_2.12</artifactId>
+            <artifactId>spark-sql_2.11</artifactId>
             <version>${spark.version}</version>
         </dependency>
         <dependency>
             <groupId>org.apache.spark</groupId>
-            <artifactId>spark-sql_2.12</artifactId>
+            <artifactId>spark-graphx_2.11</artifactId>
             <version>${spark.version}</version>
         </dependency>
         <dependency>
@@ -43,6 +43,21 @@
 
     <build>
         <plugins>
+            <!-- maven-jar -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <version>3.2.0</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+
+            <!-- maven-compiler -->
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-compiler-plugin</artifactId>
@@ -51,6 +66,65 @@
                     <target>${compiler.target.version}</target>
                 </configuration>
             </plugin>
+
+            <!-- maven-shade -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.2.1</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <createDependencyReducedPom>false</createDependencyReducedPom>
+                            <artifactSet>
+                                <excludes>
+                                    <exclude>org.apache.spark:*</exclude>
+                                    <exclude>org.apache.hadoop:*</exclude>
+                                    <exclude>org.apache.hive:*</exclude>
+                                    <exclude>log4j:log4j</exclude>
+                                    <exclude>org.apache.orc:*</exclude>
+                                    <exclude>xml-apis:xml-apis</exclude>
+                                    <exclude>javax.inject:javax.inject</exclude>
+                                    <exclude>org.spark-project.hive:hive-exec</exclude>
+                                    <exclude>stax:stax-api</exclude>
+                                    <exclude>org.glassfish.hk2.external:aopalliance-repackaged</exclude>
+                                </excludes>
+                            </artifactSet>
+                            <filters>
+                                <filter>
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>com/vesoft/tools/**</exclude>
+                                        <exclude>META-INF/*.SF</exclude>
+                                        <exclude>META-INF/*.DSA</exclude>
+                                        <exclude>META-INF/*.RSA</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+
+            <!-- scala-maven -->
+            <plugin>
+                <groupId>net.alchim31.maven</groupId>
+                <artifactId>scala-maven-plugin</artifactId>
+                <version>3.2.2</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>compile</goal>
+                            <goal>testCompile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+
         </plugins>
     </build>
 </project>
\ No newline at end of file
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/ConnectInfo.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/ConnectInfo.java
deleted file mode 100644
index 43cea8cde..000000000
--- a/tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/ConnectInfo.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/* Copyright (c) 2020 vesoft inc. All rights reserved.
- *
- * This source code is licensed under Apache 2.0 License,
- * attached with Common Clause Condition 1.0, found in the LICENSES directory.
- */
-
-package com.vesoft.nebula.bean;
-
-import com.google.common.base.Preconditions;
-import com.google.common.net.HostAndPort;
-import com.vesoft.nebula.common.Checkable;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.commons.lang.StringUtils;
-
-public class ConnectInfo implements Checkable, Serializable {
-
-    private String spaceName;
-
-    private String hostAndPorts;
-
-    public ConnectInfo(String spaceName, String hostAndPorts) {
-        this.spaceName = spaceName;
-        this.hostAndPorts = hostAndPorts;
-        check();
-    }
-
-    public String getSpaceName() {
-        return this.spaceName;
-    }
-
-    public List<HostAndPort> getHostAndPorts() {
-        List<HostAndPort> hostAndPortList = new ArrayList<>();
-        String[] hostAndPortArray = hostAndPorts.split(",");
-        for (String hostAndPort : hostAndPortArray) {
-            hostAndPortList.add(HostAndPort.fromString(hostAndPort));
-        }
-        return hostAndPortList;
-    }
-
-    @Override
-    public void check() throws IllegalArgumentException {
-        Preconditions.checkArgument(StringUtils.isNotEmpty(spaceName),
-                "The spaceName can't be null or empty");
-
-        Preconditions.checkArgument(StringUtils.isNotEmpty(hostAndPorts),
-                "The hostAndPorts can't be null or empty");
-    }
-}
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/DataSourceConfig.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/DataSourceConfig.java
new file mode 100644
index 000000000..218a53964
--- /dev/null
+++ b/tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/DataSourceConfig.java
@@ -0,0 +1,92 @@
+/* Copyright (c) 2020 vesoft inc. All rights reserved.
+ *
+ * This source code is licensed under Apache 2.0 License,
+ * attached with Common Clause Condition 1.0, found in the LICENSES directory.
+ */
+
+package com.vesoft.nebula.bean;
+
+import com.google.common.net.HostAndPort;
+import org.apache.commons.lang.StringUtils;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.ArrayList;
+
+public class DataSourceConfig implements Serializable {
+
+    private final String nameSpace;
+
+    private final String type;
+
+    private final String label;
+
+    private final String returnColString;
+
+    private boolean allCols = false;
+
+    private final int partitionNumber;
+
+    private final String hostAndPorts;
+
+    /**
+     * @param nameSpace       nameSpace
+     * @param type            scan element type
+     * @param label           vertex or edge label
+     * @param partitionNumber partition number
+     * @param returnColString scan col string example: name,age
+     * @param hostAndPorts    host and port
+     */
+    public DataSourceConfig(String nameSpace, String type, String label, String returnColString, int partitionNumber, String hostAndPorts) {
+        this.nameSpace = nameSpace;
+        this.type = type;
+        this.label = label;
+        this.returnColString = returnColString;
+        this.partitionNumber = partitionNumber;
+        this.hostAndPorts = hostAndPorts;
+    }
+
+    public String getNameSpace() {
+        return nameSpace;
+    }
+
+    public String getType() {
+        return type;
+    }
+
+    public String getLabel() {
+        return label;
+    }
+
+    public int getPartitionNumber() {
+        return partitionNumber;
+    }
+
+    public boolean getAllCols() {
+        return allCols;
+    }
+
+    public Map<String, List<String>> getReturnColMap() {
+        Map<String, List<String>> result = new HashMap<>(1);
+        if (StringUtils.isBlank(returnColString)) {
+            allCols = true;
+            result.put(label, new ArrayList<>());
+        } else {
+            List<String> properties = Arrays.asList(returnColString.split(","));
+            result.put(label, properties);
+        }
+        return result;
+    }
+
+    public List<HostAndPort> getHostAndPorts() {
+        List<HostAndPort> hostAndPortList = new ArrayList<>();
+        String[] hostAndPortArray = hostAndPorts.split(",");
+        for (String hostAndPort : hostAndPortArray) {
+            hostAndPortList.add(HostAndPort.fromString(hostAndPort));
+        }
+        return hostAndPortList;
+    }
+}
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/Parameters.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/Parameters.java
new file mode 100644
index 000000000..8cfeb953e
--- /dev/null
+++ b/tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/Parameters.java
@@ -0,0 +1,17 @@
+/* Copyright (c) 2020 vesoft inc. All rights reserved.
+ *
+ * This source code is licensed under Apache 2.0 License,
+ * attached with Common Clause Condition 1.0, found in the LICENSES directory.
+ */
+
+package com.vesoft.nebula.bean;
+
+public class Parameters {
+
+    public static final String SPACE_NAME = "spaceName";
+    public static final String LABEL = "label";
+    public static final String TYPE = "type";
+    public static final String HOST_AND_PORTS = "hostAndPorts";
+    public static final String RETURN_COLS = "returnCols";
+    public static final String PARTITION_NUMBER = "partitionNumber";
+}
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/ScanInfo.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/ScanInfo.java
deleted file mode 100644
index 956cc765f..000000000
--- a/tools/nebula-spark/src/main/java/com/vesoft/nebula/bean/ScanInfo.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/* Copyright (c) 2020 vesoft inc. All rights reserved.
- *
- * This source code is licensed under Apache 2.0 License,
- * attached with Common Clause Condition 1.0, found in the LICENSES directory.
- */
-
-package com.vesoft.nebula.bean;
-
-import com.google.common.base.Preconditions;
-import com.vesoft.nebula.common.Type;
-import com.vesoft.nebula.common.Checkable;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Pattern;
-
-public class ScanInfo implements Checkable, Serializable {
-
-    private String scanType;
-
-    private String returnColString;
-
-    private int partitionNumber;
-
-    private static final String RETURN_COL_REGEX = "(\\w+=(\\w+)(,\\w+)*)(;\\w+=(\\w+)(,\\w+)*)*";
-
-    /**
-     * @param scanType    scan element type
-     * @param returnColString  return col string example: labela=name,age;labelb=name
-     */
-    public ScanInfo(String scanType, String returnColString, int partitionNumber) {
-        this.scanType = scanType;
-        this.returnColString = returnColString;
-        this.partitionNumber = partitionNumber;
-    }
-
-    @Override
-    public void check() throws IllegalArgumentException {
-        boolean isLegalType = Type.VERTEX.getType().equalsIgnoreCase(scanType)
-                || Type.EDGE.getType().equalsIgnoreCase(scanType);
-        Preconditions.checkArgument(isLegalType,
-                "scan type '%s' is illegal, it should be '%s' or '%s'",
-                scanType, Type.VERTEX.getType(), Type.EDGE.getType());
-
-        boolean isReturnColLegal = Pattern.matches(RETURN_COL_REGEX, returnColString);
-        Preconditions.checkArgument(isReturnColLegal,
-                "return col string '%s' is illegal, the pattern should like a=b,c;d=e",
-                returnColString);
-    }
-
-    public String getScanType() {
-        return scanType;
-    }
-
-    public int getPartitionNumber() {
-        return partitionNumber;
-    }
-
-    public Map<String, List<String>> getReturnColMap() {
-        check();
-
-        Map<String, List<String>> result = new HashMap<>();
-        String[] returnColSplits = returnColString.split(";");
-        for (String returnColSplit : returnColSplits) {
-            String[] labelPropertyMap = returnColSplit.split("=");
-            String label = labelPropertyMap[0];
-            List<String> properties = Arrays.asList(labelPropertyMap[1].split(","));
-            result.put(label, properties);
-        }
-        return result;
-    }
-}
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/common/Checkable.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/common/Checkable.java
deleted file mode 100644
index bb53a04cf..000000000
--- a/tools/nebula-spark/src/main/java/com/vesoft/nebula/common/Checkable.java
+++ /dev/null
@@ -1,13 +0,0 @@
-/* Copyright (c) 2020 vesoft inc. All rights reserved.
- *
- * This source code is licensed under Apache 2.0 License,
- * attached with Common Clause Condition 1.0, found in the LICENSES directory.
- */
-
-package com.vesoft.nebula.common;
-
-public interface Checkable {
-
-    void check() throws IllegalArgumentException;
-
-}
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/common/Type.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/common/Type.java
index 7a2cb3e8e..4b891b8a6 100644
--- a/tools/nebula-spark/src/main/java/com/vesoft/nebula/common/Type.java
+++ b/tools/nebula-spark/src/main/java/com/vesoft/nebula/common/Type.java
@@ -8,7 +8,7 @@
 
 public enum Type {
 
-    VERTEX("VERTEX"),EDGE("EDGE");
+    VERTEX("VERTEX"), EDGE("EDGE");
 
     private String type;
 
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/example/NebulaReaderExample.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/example/NebulaReaderExample.java
index af92f0dd9..524084170 100644
--- a/tools/nebula-spark/src/main/java/com/vesoft/nebula/example/NebulaReaderExample.java
+++ b/tools/nebula-spark/src/main/java/com/vesoft/nebula/example/NebulaReaderExample.java
@@ -7,6 +7,7 @@
 package com.vesoft.nebula.example;
 
 import com.facebook.thrift.protocol.TCompactProtocol;
+import com.vesoft.nebula.bean.Parameters;
 import com.vesoft.nebula.common.Type;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.Dataset;
@@ -31,30 +32,35 @@ public static void main(String[] args) {
 
         Dataset<Row> vertexDataset = sparkSession
                 .read()
-                .format("com.vesoft.nebula.reader.NebulaDataSource")
-                .option("importType", Type.VERTEX.getType())
-                .option("hostAndPorts", "127.0.0.1:45500")
-                .option("spaceName", "test")
-                .option("returnCols", "course=name;building=name;student=name")
+                .format("nebula")
+                .option(Parameters.TYPE, Type.VERTEX.getType())
+                .option(Parameters.HOST_AND_PORTS, "127.0.0.1:45500")
+                .option(Parameters.PARTITION_NUMBER, "100")
+                .option(Parameters.SPACE_NAME, "nb")
+                .option(Parameters.LABEL, "player")
+                // if configuration "returnCols" is null or "", then return all cols
+                .option(Parameters.RETURN_COLS, "")
                 .load();
-        LOGGER.info("vertex schema: ");
+        LOGGER.info("vertex course schema: ");
         vertexDataset.printSchema();
         vertexDataset.show();
 
         Dataset<Row> edgeDataset = sparkSession
                 .read()
-                .format("com.vesoft.nebula.reader.NebulaDataSource")
-                .option("importType", Type.EDGE.getType())
-                .option("hostAndPorts", "127.0.0.1:45500")
-                .option("spaceName", "test")
-                .option("returnCols", "like=likeness;select=grade")
+                .format("nebula")
+                .option(Parameters.TYPE, Type.EDGE.getType())
+                .option(Parameters.HOST_AND_PORTS, "127.0.0.1:45500")
+                .option(Parameters.PARTITION_NUMBER, "100")
+                .option(Parameters.SPACE_NAME, "nb")
+                .option(Parameters.LABEL, "serve")
+                // if configuration "returnCols" is null or "", then return all cols
+                .option(Parameters.RETURN_COLS, "")
                 .load();
         LOGGER.info("edge schema: ");
         edgeDataset.printSchema();
         edgeDataset.show();
 
         LOGGER.info("vertex count: {}, edge count: {}", vertexDataset.count(), edgeDataset.count());
-
     }
 
 }
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/AbstractNebulaIterator.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/AbstractNebulaIterator.java
index 70e621ecb..a95eb2389 100644
--- a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/AbstractNebulaIterator.java
+++ b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/AbstractNebulaIterator.java
@@ -7,18 +7,15 @@
 package com.vesoft.nebula.reader;
 
 import com.facebook.thrift.TException;
-import com.vesoft.nebula.bean.ConnectInfo;
-import com.vesoft.nebula.bean.ScanInfo;
+import com.vesoft.nebula.bean.DataSourceConfig;
 import com.vesoft.nebula.exception.GraphConnectException;
 import com.vesoft.nebula.client.meta.MetaClientImpl;
 import com.vesoft.nebula.client.storage.StorageClientImpl;
 import com.vesoft.nebula.client.storage.processor.Processor;
 import com.vesoft.nebula.data.Result;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
+
+import java.util.*;
+
 import org.apache.spark.Partition;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;
@@ -30,34 +27,21 @@ public abstract class AbstractNebulaIterator extends AbstractIterator<Row> {
 
     private static final Logger LOGGER = LoggerFactory.getLogger(AbstractNebulaIterator.class);
 
-    protected Iterator<String[]> dataIterator;
+    protected Iterator<String> dataIterator;
     protected Iterator<Integer> scanPartIterator;
+    protected Map<String, List<Object>> resultValues = new HashMap<>();
 
     protected StorageClientImpl storageClient;
     protected MetaClientImpl metaClient;
     protected Processor processor;
 
-    protected ConnectInfo connectInfo;
     protected Map<String, List<String>> returnCols;
-    protected Map<String, List<Integer>> labelPropIndexMap;
-    protected int propSize;
 
-    public AbstractNebulaIterator(ConnectInfo connectInfo, Partition split,
-                                  ScanInfo scanInfo, Map<String, Integer> propIndexMap) {
-        this.propSize = propIndexMap.size();
-        this.connectInfo = connectInfo;
-        this.returnCols = scanInfo.getReturnColMap();
-        this.labelPropIndexMap = new HashMap<>();
-        for (Map.Entry<String, List<String>> colEntry : returnCols.entrySet()) {
-            List<Integer> colIndexs = new ArrayList<>();
-            List<String> propNames = colEntry.getValue();
-            for (String propName : propNames) {
-                colIndexs.add(propIndexMap.get(propName));
-            }
-            labelPropIndexMap.put(colEntry.getKey(), colIndexs);
-        }
-        LOGGER.info("labelPropIndexMap: {}", labelPropIndexMap);
-        metaClient = new MetaClientImpl(connectInfo.getHostAndPorts());
+    public AbstractNebulaIterator(Partition split,
+                                  DataSourceConfig dataSourceConfig) {
+        this.returnCols = dataSourceConfig.getReturnColMap();
+
+        metaClient = new MetaClientImpl(dataSourceConfig.getHostAndPorts());
         try {
             metaClient.connect();
         } catch (TException e) {
@@ -66,11 +50,11 @@ public AbstractNebulaIterator(ConnectInfo connectInfo, Partition split,
         storageClient = new StorageClientImpl(metaClient);
 
         // allocate scanPart to this partition
-        int totalPart = metaClient.getPartsAlloc(connectInfo.getSpaceName()).size();
+        int totalPart = metaClient.getPartsAlloc(dataSourceConfig.getNameSpace()).size();
         NebulaPartition nebulaPartition = (NebulaPartition) split;
         List<Integer> scanParts = nebulaPartition.getScanParts(totalPart,
-                                                                scanInfo.getPartitionNumber());
-        LOGGER.info("partition index: {}, scanPart: {}", split.index(), scanParts);
+                dataSourceConfig.getPartitionNumber());
+        LOGGER.info("partition index: {}, scanPart: {}", split.index(), scanParts.toString());
         scanPartIterator = scanParts.iterator();
     }
 
@@ -79,8 +63,8 @@ public AbstractNebulaIterator(ConnectInfo connectInfo, Partition split,
 
     @Override
     public Row next() {
-        return RowFactory.create(dataIterator.next());
+        return RowFactory.create(resultValues.get(dataIterator.next()).toArray());
     }
 
-    protected abstract Iterator<String[]> process(Result result);
+    protected abstract Iterator<String> process(Result result);
 }
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/DefaultSource.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/DefaultSource.java
new file mode 100644
index 000000000..e8a50a800
--- /dev/null
+++ b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/DefaultSource.java
@@ -0,0 +1,77 @@
+/* Copyright (c) 2020 vesoft inc. All rights reserved.
+ *
+ * This source code is licensed under Apache 2.0 License,
+ * attached with Common Clause Condition 1.0, found in the LICENSES directory.
+ */
+
+package com.vesoft.nebula.reader;
+
+import com.google.common.base.Preconditions;
+import com.vesoft.nebula.bean.Parameters;
+import com.vesoft.nebula.bean.DataSourceConfig;
+import com.vesoft.nebula.common.Type;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.sources.BaseRelation;
+import org.apache.spark.sql.sources.DataSourceRegister;
+import org.apache.spark.sql.sources.RelationProvider;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.collection.immutable.Map;
+
+import java.util.regex.Pattern;
+
+public class DefaultSource implements RelationProvider, DataSourceRegister {
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(DefaultSource.class);
+    private static final String RETURN_COL_REGEX = "(\\w+)(,\\w+)*";
+
+    private DataSourceConfig dataSourceConfig;
+
+    @Override
+    public String shortName() {
+        return "nebula";
+    }
+
+    @Override
+    public BaseRelation createRelation(SQLContext sqlContext, Map<String, String> parameters) {
+        // check and parse parameter spaceName
+        Preconditions.checkArgument(parameters.get(Parameters.SPACE_NAME).isDefined(), "spaceName is not configured.");
+        String spaceName = parameters.get(Parameters.SPACE_NAME).get();
+
+        // check and parse parameter type
+        Preconditions.checkArgument(parameters.get(Parameters.TYPE).isDefined(), "type is not configured.");
+
+        String type = parameters.get(Parameters.TYPE).get();
+        Preconditions.checkArgument(type.equalsIgnoreCase(Type.EDGE.getType())
+                        || type.equalsIgnoreCase(Type.VERTEX.getType()),
+                "type '%s' is illegal, it should be '%s' or '%s'",
+                type, Type.VERTEX.getType(), Type.EDGE.getType());
+
+        // check and parse parameter label
+        Preconditions.checkArgument(parameters.get(Parameters.LABEL).isDefined(), "label is not configured.");
+        String label = parameters.get(Parameters.LABEL).get();
+
+        // check and parse parameter hostAndPorts
+        Preconditions.checkArgument(parameters.get(Parameters.HOST_AND_PORTS).isDefined(), "hostAndPorts is not configured.");
+        String hostAndPorts = parameters.get(Parameters.HOST_AND_PORTS).get();
+
+        // check and parse parameter returnCols
+        Preconditions.checkArgument(parameters.get(Parameters.RETURN_COLS).isDefined(), "returnCols is not configured.");
+        String returnCols = parameters.get(Parameters.RETURN_COLS).get();
+        boolean isReturnColLegal = StringUtils.isBlank(returnCols) || Pattern.matches(RETURN_COL_REGEX, returnCols);
+        Preconditions.checkArgument(isReturnColLegal,
+                "returnCols '%s' is illegal, the pattern should be blank or string like a,b",
+                returnCols);
+
+        // check and parse parameter partitionNumber
+        Preconditions.checkArgument(parameters.get(Parameters.PARTITION_NUMBER).isDefined(), "partition is not configured.");
+        String partitionNumber = parameters.get(Parameters.PARTITION_NUMBER).get();
+
+        dataSourceConfig = new DataSourceConfig(spaceName, type,
+                label, returnCols, Integer.parseInt(partitionNumber), hostAndPorts);
+        LOGGER.info("dataSourceConfig: {}", dataSourceConfig);
+
+        return new NebulaRelation(sqlContext, dataSourceConfig);
+    }
+}
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaDataSource.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaDataSource.java
deleted file mode 100644
index 1e766b5e9..000000000
--- a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaDataSource.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/* Copyright (c) 2020 vesoft inc. All rights reserved.
- *
- * This source code is licensed under Apache 2.0 License,
- * attached with Common Clause Condition 1.0, found in the LICENSES directory.
- */
-
-package com.vesoft.nebula.reader;
-
-import com.vesoft.nebula.bean.ConnectInfo;
-import com.vesoft.nebula.bean.ScanInfo;
-import org.apache.spark.sql.SQLContext;
-import org.apache.spark.sql.sources.BaseRelation;
-import org.apache.spark.sql.sources.RelationProvider;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import scala.collection.immutable.Map;
-import scala.runtime.AbstractFunction0;
-
-public class NebulaDataSource implements RelationProvider {
-
-    private static final Logger LOGGER = LoggerFactory.getLogger(NebulaDataSource.class);
-
-    private ConnectInfo connectInfo;
-    private ScanInfo scanInfo;
-
-    @Override
-    public BaseRelation createRelation(SQLContext sqlContext, Map<String, String> parameters) {
-        // parse and check parameters
-        String spaceName = parameters.get("spaceName").get();
-        String hostAndPorts = parameters.get("hostAndPorts").get();
-        connectInfo = new ConnectInfo(spaceName, hostAndPorts);
-        LOGGER.info("connectInfo, {}", connectInfo);
-
-        String partitionNumber = parameters.getOrElse("partitionNumber",
-                new AbstractFunction0<String>() {
-                    @Override
-                    public String apply() {
-                        return String.valueOf(Runtime.getRuntime().availableProcessors());
-                    }
-                });
-        scanInfo = new ScanInfo(parameters.get("importType").get(),
-                parameters.get("returnCols").get(), Integer.valueOf(partitionNumber));
-        LOGGER.info("scanInfo: {}", scanInfo);
-
-        return new NebulaRelation(sqlContext, connectInfo, scanInfo);
-    }
-}
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaPartition.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaPartition.java
index bd4eb1b67..235caa5c6 100644
--- a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaPartition.java
+++ b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaPartition.java
@@ -8,6 +8,7 @@
 
 import java.util.ArrayList;
 import java.util.List;
+
 import org.apache.spark.Partition;
 
 public class NebulaPartition implements Partition {
@@ -25,19 +26,17 @@ public int index() {
 
     /**
      * allocate scanPart to partition
-     * @param totalPart  nebula data part num
-     * @return           scan data part list
+     *
+     * @param totalPart nebula data part num
+     * @return scan data part list
      */
     public List<Integer> getScanParts(int totalPart, int totalPartition) {
         List<Integer> scanParts = new ArrayList<>();
-
         int currentPart = index + 1;
         while (currentPart <= totalPart) {
             scanParts.add(currentPart);
             currentPart += totalPartition;
         }
-
         return scanParts;
-
     }
 }
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaRDD.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaRDD.java
index 80bc21011..bb6654624 100644
--- a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaRDD.java
+++ b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaRDD.java
@@ -6,10 +6,9 @@
 
 package com.vesoft.nebula.reader;
 
-import com.vesoft.nebula.bean.ConnectInfo;
-import com.vesoft.nebula.bean.ScanInfo;
+import com.vesoft.nebula.bean.DataSourceConfig;
 import com.vesoft.nebula.common.Type;
-import java.util.Map;
+
 import org.apache.spark.Partition;
 import org.apache.spark.TaskContext;
 import org.apache.spark.rdd.RDD;
@@ -24,38 +23,38 @@ public class NebulaRDD extends RDD<Row> {
 
     private static final ClassTag<Row> ROW_TAG = ClassManifestFactory$.MODULE$.fromClass(Row.class);
 
-    private ConnectInfo connectInfo;
-    private ScanInfo scanInfo;
-    private Map<String, Integer> propIndexMap;
+    private DataSourceConfig dataSourceConfig;
 
     /**
-     * @param sqlContext    sqlContext
-     * @param scanInfo      scan info
-     * @param connectInfo   nebula connect info
-     * @param propIndexMap  label and its properties in schema index map
+     * @param sqlContext       sqlContext
+     * @param dataSourceConfig scan info
      */
-    public NebulaRDD(SQLContext sqlContext, ScanInfo scanInfo,
-                     ConnectInfo connectInfo, Map<String, Integer> propIndexMap) {
+    public NebulaRDD(SQLContext sqlContext, DataSourceConfig dataSourceConfig) {
         super(sqlContext.sparkContext(), new ArrayBuffer<>(), ROW_TAG);
 
-        this.propIndexMap = propIndexMap;
-        this.scanInfo = scanInfo;
-        this.connectInfo = connectInfo;
+        this.dataSourceConfig = dataSourceConfig;
     }
 
+    /**
+     * start to scan vertex or edge data
+     *
+     * @param split
+     * @param context
+     * @return Iterator<Row>
+     */
     @Override
     public Iterator<Row> compute(Partition split, TaskContext context) {
-        String scanType = scanInfo.getScanType();
-        if (Type.VERTEX.getType().equalsIgnoreCase(scanType)) {
-            return new ScanVertexIterator(connectInfo, split, scanInfo, propIndexMap);
+        String type = dataSourceConfig.getType();
+        if (Type.VERTEX.getType().equalsIgnoreCase(type)) {
+            return new ScanVertexIterator(split, dataSourceConfig);
         } else {
-            return new ScanEdgeIterator(connectInfo, split, scanInfo, propIndexMap);
+            return new ScanEdgeIterator(split, dataSourceConfig);
         }
     }
 
     @Override
     public Partition[] getPartitions() {
-        int partitionNumber = scanInfo.getPartitionNumber();
+        int partitionNumber = dataSourceConfig.getPartitionNumber();
         Partition[] partitions = new Partition[partitionNumber];
         for (int i = 0; i < partitionNumber; i++) {
             Partition partition = new NebulaPartition(i);
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaRelation.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaRelation.java
index 71edce695..7ff95c5da 100644
--- a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaRelation.java
+++ b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/NebulaRelation.java
@@ -6,16 +6,19 @@
 
 package com.vesoft.nebula.reader;
 
-import com.vesoft.nebula.bean.ConnectInfo;
-import com.vesoft.nebula.bean.ScanInfo;
+import com.facebook.thrift.TException;
+import com.vesoft.nebula.bean.DataSourceConfig;
+import com.vesoft.nebula.client.meta.MetaClientImpl;
 import com.vesoft.nebula.common.Type;
+
 import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
+import java.util.HashMap;
+import java.util.ArrayList;
+
+import com.vesoft.nebula.exception.GraphConnectException;
+import com.vesoft.nebula.util.DataTypeConverter;
 import org.apache.spark.rdd.RDD;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SQLContext;
@@ -33,64 +36,83 @@ public class NebulaRelation extends BaseRelation implements Serializable, TableS
 
     private SQLContext sqlContext;
     private StructType schema;
+    private final Map<String, List<StructField>> labelFields = new HashMap<>();
 
-    private ConnectInfo connectInfo;
-    private ScanInfo scanInfo;
-    private Map<String, Integer> propIndexMap;
+    private DataSourceConfig dataSourceConfig;
 
-    public NebulaRelation(SQLContext sqlContext, ConnectInfo connectInfo, ScanInfo scanInfo) {
+    public NebulaRelation(SQLContext sqlContext, DataSourceConfig dataSourceConfig) {
         this.sqlContext = sqlContext;
-        this.connectInfo = connectInfo;
-        this.scanInfo = scanInfo;
+        this.dataSourceConfig = dataSourceConfig;
 
-        initParameters(scanInfo);
+        initSchema(dataSourceConfig);
     }
 
-    private void initParameters(ScanInfo scanInfo) {
-
-        Set<String> returnPropSet = new HashSet<>();
-        Map<String, List<String>> returnColMap = scanInfo.getReturnColMap();
+    /**
+     * init result dataset's schema
+     *
+     * @param dataSourceConfig
+     */
+    private void initSchema(DataSourceConfig dataSourceConfig) {
+        Map<String, List<String>> returnColMap = dataSourceConfig.getReturnColMap();
         LOGGER.info("return col map: {}", returnColMap);
-        for (Map.Entry<String, List<String>> returnColEntry : returnColMap.entrySet()) {
-            List<String> returnCols = returnColEntry.getValue();
-            for (String returnCol : returnCols) {
-                returnPropSet.add(returnCol);
-            }
-        }
-        LOGGER.info("return prop set: {}", returnPropSet);
 
         List<StructField> fields = new ArrayList<>();
-        if (Type.VERTEX.getType().equalsIgnoreCase(scanInfo.getScanType())) {
-            fields.add(DataTypes.createStructField("id", DataTypes.StringType, false));
-        } else {
-            fields.add(DataTypes.createStructField("src", DataTypes.StringType, false));
-            fields.add(DataTypes.createStructField("dst", DataTypes.StringType, false));
-        }
-        /*
-            spark read result has only one dataset, but scan result contain difference properties,
-            so create a schema contain all properties
-         */
-        for (String returnCol : returnPropSet) {
-            fields.add(DataTypes.createStructField(returnCol, DataTypes.StringType, true));
+
+        MetaClientImpl metaClient = new MetaClientImpl(dataSourceConfig.getHostAndPorts());
+        try {
+            metaClient.connect();
+        } catch (TException e) {
+            throw new GraphConnectException(e.getMessage(), e);
         }
-        schema = DataTypes.createStructType(fields);
 
-        propIndexMap = new HashMap<>();
-        for (int i = 0; i < fields.size(); i++) {
-            propIndexMap.put(fields.get(i).name(), i);
+        Map<String, Class> schemaColAndType;
+        for (Map.Entry<String, List<String>> returnColEntry : returnColMap.entrySet()) {
+            if (Type.VERTEX.getType().equalsIgnoreCase(dataSourceConfig.getType())) {
+                fields.add(DataTypes.createStructField("_vertexId", DataTypes.StringType, false));
+                schemaColAndType = metaClient.getTagSchema(dataSourceConfig.getNameSpace(), dataSourceConfig.getLabel());
+            } else {
+                fields.add(DataTypes.createStructField("_srcId", DataTypes.StringType, false));
+                fields.add(DataTypes.createStructField("_dstId", DataTypes.StringType, false));
+                schemaColAndType = metaClient.getEdgeSchema(dataSourceConfig.getNameSpace(), dataSourceConfig.getLabel());
+            }
+
+            if (dataSourceConfig.getAllCols()) {
+                // if allCols is true, then fields should contain all properties.
+                for (Map.Entry<String, Class> colTypeEntry : schemaColAndType.entrySet()) {
+                    fields.add(DataTypes.createStructField(colTypeEntry.getKey(),
+                            DataTypeConverter.convertDataType(colTypeEntry.getValue()),
+                            true));
+                }
+            } else {
+                for (String returnCol : returnColEntry.getValue()) {
+                    if (schemaColAndType.containsKey(returnCol)) {
+                        fields.add(DataTypes.createStructField(returnCol,
+                                DataTypeConverter.convertDataType(schemaColAndType.get(returnCol)),
+                                true));
+                    } else {
+                        LOGGER.warn("label {} doesn't contain col {}", dataSourceConfig.getLabel(), returnCol);
+                    }
+                }
+            }
+
+            labelFields.put(returnColEntry.getKey(), fields);
+            schema = new StructType(fields.toArray(new StructField[fields.size()]));
+            LOGGER.info("return prop set for label {} : {}", returnColEntry.getKey(), fields);
         }
-        LOGGER.info("propIndexMap: {}", propIndexMap);
     }
 
+    @Override
     public SQLContext sqlContext() {
         return sqlContext;
     }
 
+    @Override
     public StructType schema() {
         return schema;
     }
 
+    @Override
     public RDD<Row> buildScan() {
-        return new NebulaRDD(sqlContext, scanInfo, connectInfo, propIndexMap);
+        return new NebulaRDD(sqlContext, dataSourceConfig);
     }
 }
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/ScanEdgeIterator.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/ScanEdgeIterator.java
index f7dd2c3eb..8ca8b8633 100644
--- a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/ScanEdgeIterator.java
+++ b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/ScanEdgeIterator.java
@@ -6,31 +6,35 @@
 
 package com.vesoft.nebula.reader;
 
-import com.vesoft.nebula.bean.ConnectInfo;
-import com.vesoft.nebula.bean.ScanInfo;
+import com.vesoft.nebula.bean.DataSourceConfig;
 import com.vesoft.nebula.client.storage.processor.ScanEdgeProcessor;
 import com.vesoft.nebula.data.Property;
 import com.vesoft.nebula.data.Result;
+import com.vesoft.nebula.data.Row;
 import com.vesoft.nebula.storage.ScanEdgeResponse;
+
 import java.io.IOException;
+import java.util.Map;
+import java.util.List;
 import java.util.ArrayList;
 import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
+
 import org.apache.spark.Partition;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public class ScanEdgeIterator extends AbstractNebulaIterator {
 
-    private Logger logger = LoggerFactory.getLogger(ScanEdgeIterator.class);
+    private final Logger LOGGER = LoggerFactory.getLogger(ScanEdgeIterator.class);
 
     private Iterator<ScanEdgeResponse> responseIterator;
 
-    public ScanEdgeIterator(ConnectInfo connectInfo, Partition split,
-                            ScanInfo scanInfo, Map<String, Integer> propIndexMap) {
-        super(connectInfo, split, scanInfo, propIndexMap);
-        processor = new ScanEdgeProcessor(metaClient);
+    private DataSourceConfig dataSourceConfig;
+
+    public ScanEdgeIterator(Partition split,
+                            DataSourceConfig dataSourceConfig) {
+        super(split, dataSourceConfig);
+        this.dataSourceConfig = dataSourceConfig;
     }
 
     @Override
@@ -43,22 +47,22 @@ public boolean hasNext() {
             if (responseIterator == null || !responseIterator.hasNext()) {
                 if (scanPartIterator.hasNext()) {
                     try {
-                        responseIterator = storageClient.scanEdge(connectInfo.getSpaceName(),
-                                scanPartIterator.next(), returnCols,
-                                false, 1000, 0L, Long.MAX_VALUE);
+                        responseIterator = storageClient.scanEdge(dataSourceConfig.getNameSpace(),
+                                scanPartIterator.next(), returnCols, dataSourceConfig.getAllCols(),
+                                1000, 0L, Long.MAX_VALUE);
                     } catch (IOException e) {
-                        logger.error(e.getMessage(), e);
+                        LOGGER.error(e.getMessage(), e);
                     }
                     continue;
                 }
                 break;
-            } else if (responseIterator.hasNext()) {
+            } else {
                 ScanEdgeResponse next = responseIterator.next();
                 if (next != null) {
-                    Result processResult = processor.process(connectInfo.getSpaceName(), next);
+                    processor = new ScanEdgeProcessor(metaClient);
+                    Result processResult = processor.process(dataSourceConfig.getNameSpace(), next);
                     dataIterator = process(processResult);
                 }
-                continue;
             }
         }
         if (dataIterator == null) {
@@ -68,26 +72,24 @@ public boolean hasNext() {
     }
 
     @Override
-    protected Iterator<String[]> process(Result result) {
-        List<String[]> resultValues = new ArrayList<>();
-
-        Map<String, List<com.vesoft.nebula.data.Row>> dataMap = result.getRows();
-        for (Map.Entry<String, List<com.vesoft.nebula.data.Row>> dataEntry : dataMap.entrySet()) {
+    protected Iterator<String> process(Result result) {
+        Map<String, List<Row>> dataMap = result.getRows();
+        for (Map.Entry<String, List<Row>> dataEntry : dataMap.entrySet()) {
             String labelName = dataEntry.getKey();
-            List<Integer> propIndexs = labelPropIndexMap.get(labelName);
-            List<com.vesoft.nebula.data.Row> rows = dataEntry.getValue();
-            for (com.vesoft.nebula.data.Row row : rows) {
-                Iterator<Integer> nameIndexIterator = propIndexs.iterator();
-                String[] fields = new String[propSize + 2];
-                fields[0] = String.valueOf(row.getDefaultProperties()[0].getValue());
-                fields[1] = String.valueOf(row.getDefaultProperties()[2].getValue());
+            List<Row> rows = dataEntry.getValue();
+            for (Row row : rows) {
+                List<Object> fields = new ArrayList<>();
+                // add default property _srcId and _dstId for edge
+                fields.add(String.valueOf(row.getDefaultProperties()[0].getValue()));
+                fields.add(String.valueOf(row.getDefaultProperties()[2].getValue()));
                 Property[] properties = row.getProperties();
                 for (int i = 0; i < properties.length; i++) {
-                    fields[nameIndexIterator.next()] = String.valueOf(properties[i].getValue());
+                    fields.add(properties[i].getValue());
                 }
-                resultValues.add(fields);
+                resultValues.put(labelName, fields);
             }
         }
-        return resultValues.iterator();
+        LOGGER.info("edge info ={}", resultValues.toString());
+        return resultValues.keySet().iterator();
     }
 }
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/ScanVertexIterator.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/ScanVertexIterator.java
index c36bd203d..80e96304d 100644
--- a/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/ScanVertexIterator.java
+++ b/tools/nebula-spark/src/main/java/com/vesoft/nebula/reader/ScanVertexIterator.java
@@ -6,32 +6,36 @@
 
 package com.vesoft.nebula.reader;
 
-import com.vesoft.nebula.bean.ConnectInfo;
-import com.vesoft.nebula.bean.ScanInfo;
+import com.vesoft.nebula.bean.DataSourceConfig;
+import com.vesoft.nebula.data.Row;
 import com.vesoft.nebula.exception.GraphOperateException;
 import com.vesoft.nebula.client.storage.processor.ScanVertexProcessor;
 import com.vesoft.nebula.data.Property;
 import com.vesoft.nebula.data.Result;
 import com.vesoft.nebula.storage.ScanVertexResponse;
+
 import java.io.IOException;
+import java.util.Map;
+import java.util.List;
 import java.util.ArrayList;
 import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
+
 import org.apache.spark.Partition;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public class ScanVertexIterator extends AbstractNebulaIterator {
 
-    private Logger logger = LoggerFactory.getLogger(ScanVertexIterator.class);
+    private final Logger LOGGER = LoggerFactory.getLogger(ScanVertexIterator.class);
 
     private Iterator<ScanVertexResponse> responseIterator;
 
-    public ScanVertexIterator(ConnectInfo connectInfo, Partition split,
-                              ScanInfo scanInfo, Map<String, Integer> propIndexMap) {
-        super(connectInfo, split, scanInfo, propIndexMap);
-        processor = new ScanVertexProcessor(metaClient);
+    private DataSourceConfig dataSourceConfig;
+
+    public ScanVertexIterator(Partition split,
+                              DataSourceConfig dataSourceConfig) {
+        super(split, dataSourceConfig);
+        this.dataSourceConfig = dataSourceConfig;
     }
 
     @Override
@@ -44,26 +48,25 @@ public boolean hasNext() {
             if (responseIterator == null || !responseIterator.hasNext()) {
                 if (scanPartIterator.hasNext()) {
                     try {
-                        responseIterator = storageClient.scanVertex(connectInfo.getSpaceName(),
-                                scanPartIterator.next(), returnCols,
-                                false, 1000, 0L, Long.MAX_VALUE);
+                        responseIterator = storageClient.scanVertex(dataSourceConfig.getNameSpace(),
+                                scanPartIterator.next(), returnCols, dataSourceConfig.getAllCols(),
+                                1000, 0L, Long.MAX_VALUE);
                     } catch (IOException e) {
-                        logger.error(e.getMessage(), e);
+                        LOGGER.error(e.getMessage(), e);
                         throw new GraphOperateException(e.getMessage(), e);
                     }
                     continue;
                 }
                 break;
-            } else if (responseIterator.hasNext()) {
+            } else {
                 ScanVertexResponse next = responseIterator.next();
                 if (next != null) {
-                    Result processResult = processor.process(connectInfo.getSpaceName(), next);
+                    processor = new ScanVertexProcessor(metaClient);
+                    Result processResult = processor.process(dataSourceConfig.getNameSpace(), next);
                     dataIterator = process(processResult);
                 }
-                continue;
             }
         }
-
         if (dataIterator == null) {
             return false;
         }
@@ -71,25 +74,22 @@ public boolean hasNext() {
     }
 
     @Override
-    protected Iterator<String[]> process(Result result) {
-        List<String[]> resultValues = new ArrayList<>();
-
-        Map<String, List<com.vesoft.nebula.data.Row>> dataMap = result.getRows();
-        for (Map.Entry<String, List<com.vesoft.nebula.data.Row>> dataEntry : dataMap.entrySet()) {
+    protected Iterator<String> process(Result result) {
+        Map<String, List<Row>> dataMap = result.getRows();
+        for (Map.Entry<String, List<Row>> dataEntry : dataMap.entrySet()) {
             String labelName = dataEntry.getKey();
-            List<Integer> propIndexs = labelPropIndexMap.get(labelName);
-            List<com.vesoft.nebula.data.Row> rows = dataEntry.getValue();
-            for (com.vesoft.nebula.data.Row row : rows) {
-                Iterator<Integer> nameIndexIterator = propIndexs.iterator();
-                String[] fields = new String[propSize + 1];
-                fields[0] = String.valueOf(row.getDefaultProperties()[0].getValue());
+            for (Row row : dataEntry.getValue()) {
+                List<Object> fields = new ArrayList<>();
+                // add default property _vertexId for tag
+                fields.add(String.valueOf(row.getDefaultProperties()[0].getValue()));
                 Property[] properties = row.getProperties();
                 for (int i = 0; i < properties.length; i++) {
-                    fields[nameIndexIterator.next()] = String.valueOf(properties[i].getValue());
+                    fields.add(properties[i].getValue());
                 }
-                resultValues.add(fields);
+                resultValues.put(labelName, fields);
             }
         }
-        return resultValues.iterator();
+        LOGGER.info("tag info ={}", resultValues.toString());
+        return resultValues.keySet().iterator();
     }
 }
diff --git a/tools/nebula-spark/src/main/java/com/vesoft/nebula/util/DataTypeConverter.java b/tools/nebula-spark/src/main/java/com/vesoft/nebula/util/DataTypeConverter.java
new file mode 100644
index 000000000..61dcce756
--- /dev/null
+++ b/tools/nebula-spark/src/main/java/com/vesoft/nebula/util/DataTypeConverter.java
@@ -0,0 +1,18 @@
+package com.vesoft.nebula.util;
+
+import org.apache.spark.sql.types.DataType;
+
+import static org.apache.spark.sql.types.DataTypes.*;
+
+public class DataTypeConverter {
+    public static DataType convertDataType(Class clazz) {
+        if (Boolean.class.equals(clazz)) {
+            return BooleanType;
+        } else if (Long.class.equals(clazz)) {
+            return LongType;
+        } else if (Double.class.equals(clazz)) {
+            return DoubleType;
+        }
+        return StringType;
+    }
+}
diff --git a/tools/nebula-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/tools/nebula-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
new file mode 100644
index 000000000..edeaf4ea1
--- /dev/null
+++ b/tools/nebula-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -0,0 +1,2 @@
+com.vesoft.nebula.reader.DefaultSource
+#com.vesoft.nebula.tools.connector.reader.NebulaRelationProvider
\ No newline at end of file