diff --git a/README.md b/README.md index b9d35ab967..0baa7a509f 100755 --- a/README.md +++ b/README.md @@ -31,12 +31,33 @@ Uses as below import org.apache.spark.sql.TiContext val ti = new TiContext(spark) -// Mapping all TiDB tables from database tpch as Spark SQL tables +// Map all TiDB tables from database tpch as Spark SQL tables ti.tidbMapDatabase("tpch") spark.sql("select count(*) from lineitem").show ``` +## Metadata loading +If you are using spark-shell, you need to manually load schema information as decribed above. + +If you have too many tables, you might choose to disable histogram preparison and loading will be faster. + +``` +ti.tidbMapDatabase("tpch", autoLoadStatistics = true) +``` + +If you have two tables with same name in different databases, you might choose to append database name as prefix for table name: + +``` +ti.tidbMapDatabase("tpch", dbNameAsPrefix = true) +``` + +If you have too many tables and use only some of them, to speed up meta loading process, you might manually load only tables you use: + +``` +ti.tidbTable("tpch", "lineitem") +``` + ## Current Version ``` ti.version diff --git a/core/src/main/scala/org/apache/spark/sql/TiContext.scala b/core/src/main/scala/org/apache/spark/sql/TiContext.scala index a734baa8a4..3a3dd07a48 100644 --- a/core/src/main/scala/org/apache/spark/sql/TiContext.scala +++ b/core/src/main/scala/org/apache/spark/sql/TiContext.scala @@ -129,13 +129,15 @@ class TiContext(val session: SparkSession) extends Serializable with Logging { } } - def tidbTable(dbName: String, tableName: String): DataFrame = { + // tidbMapTable does not do any check any meta information + // it just register table for later use + def tidbMapTable(dbName: String, tableName: String): Unit = { val tiRelation = new TiDBRelation( tiSession, new TiTableReference(dbName, tableName), meta )(sqlContext) - sqlContext.baseRelationToDataFrame(tiRelation) + sqlContext.baseRelationToDataFrame(tiRelation).createTempView(tableName) } def tidbMapDatabase(dbName: String,