diff --git a/pom.xml b/pom.xml index 861a61d..65f5300 100644 --- a/pom.xml +++ b/pom.xml @@ -11,7 +11,7 @@ Cascading and Scalding wrapper for HBase with advanced features parallelai parallelai.spyglass - 2.10_0.12.0_5.3.0 + 2.10_0.12.0_5.3.0_beta jar @@ -37,7 +37,7 @@ - 2.10.3 + 2.10.4 2.10 0.12.0 @@ -153,7 +153,17 @@ org.apache.hbase - hbase + hbase-client + ${hbase.version} + + + org.apache.hbase + hbase-common + ${hbase.version} + + + org.apache.hbase + hbase-server ${hbase.version} @@ -180,6 +190,12 @@ 4.1 + + joda-time + joda-time + 2.7 + + org.specs2 diff --git a/src/main/java/parallelai/spyglass/hbase/HBaseInputFormatGranular.java b/src/main/java/parallelai/spyglass/hbase/HBaseInputFormatGranular.java index 332bbd7..aadeef4 100644 --- a/src/main/java/parallelai/spyglass/hbase/HBaseInputFormatGranular.java +++ b/src/main/java/parallelai/spyglass/hbase/HBaseInputFormatGranular.java @@ -14,7 +14,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionLocation; -import org.apache.hadoop.hbase.HServerAddress; +//import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.util.Addressing; @@ -110,16 +110,16 @@ public HBaseTableSplitGranular[] getSplits(JobConf job, int numSplits) throws IO : maxKey; HRegionLocation regionLoc = table.getRegionLocation(keys.getFirst()[i]); - HServerAddress regionServerAddress = regionLoc.getServerAddress(); - InetAddress regionAddress = regionServerAddress.getInetSocketAddress().getAddress(); - String regionLocation; - try { - regionLocation = reverseDNS(regionAddress); - } catch (NamingException e) { - LOG.error("Cannot resolve the host name for " + regionAddress - + " because of " + e); - regionLocation = regionServerAddress.getHostname(); - } +// HServerAddress regionServerAddress = regionLoc.getServerName().getHostname(); +// InetAddress regionAddress = regionServerAddress.getInetSocketAddress().getAddress(); + String regionLocation = regionLoc.getServerName().getHostname(); +// try { +// regionLocation = reverseDNS(regionAddress); +// } catch (NamingException e) { +// LOG.error("Cannot resolve the host name for " + regionAddress +// + " because of " + e); +// regionLocation = regionServerAddress.getHostname(); +// } regionNames[i] = regionLoc.getRegionInfo().getRegionNameAsString(); @@ -189,18 +189,18 @@ public HBaseTableSplitGranular[] getSplits(JobConf job, int numSplits) throws IO byte[] rStart = cRegion.getRegionInfo().getStartKey(); byte[] rStop = cRegion.getRegionInfo().getEndKey(); - HServerAddress regionServerAddress = cRegion - .getServerAddress(); - InetAddress regionAddress = regionServerAddress - .getInetSocketAddress().getAddress(); - String regionLocation; - try { - regionLocation = reverseDNS(regionAddress); - } catch (NamingException e) { - LOG.error("Cannot resolve the host name for " - + regionAddress + " because of " + e); - regionLocation = regionServerAddress.getHostname(); - } +// HServerAddress regionServerAddress = cRegion +// .getServerAddress(); +// InetAddress regionAddress = regionServerAddress +// .getInetSocketAddress().getAddress(); + String regionLocation = cRegion.getHostname(); +// try { +// regionLocation = reverseDNS(regionAddress); +// } catch (NamingException e) { +// LOG.error("Cannot resolve the host name for " +// + regionAddress + " because of " + e); +// regionLocation = regionServerAddress.getHostname(); +// } String regionName = cRegion.getRegionInfo().getRegionNameAsString(); diff --git a/src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java b/src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java index efe548d..6eee387 100644 --- a/src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java +++ b/src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java @@ -156,7 +156,7 @@ public Path getPath() { return new Path(SCHEME + ":/" + tableName.replaceAll(":", "_")); } - protected HBaseAdmin getHBaseAdmin(JobConf conf) throws MasterNotRunningException, ZooKeeperConnectionException { + protected HBaseAdmin getHBaseAdmin(JobConf conf) throws MasterNotRunningException, ZooKeeperConnectionException, IOException { if (hBaseAdmin == null) { Configuration hbaseConf = HBaseConfiguration.create(conf); hBaseAdmin = new HBaseAdmin(hbaseConf); diff --git a/src/main/java/parallelai/spyglass/hbase/HBaseRecordReaderGranular.java b/src/main/java/parallelai/spyglass/hbase/HBaseRecordReaderGranular.java index b34ed3f..7abebc2 100644 --- a/src/main/java/parallelai/spyglass/hbase/HBaseRecordReaderGranular.java +++ b/src/main/java/parallelai/spyglass/hbase/HBaseRecordReaderGranular.java @@ -249,9 +249,10 @@ public boolean next(ImmutableBytesWritable key, Result value) } else { key.set(result.getRow()); } - + lastSuccessfulRow = key.get(); - Writables.copyWritable(result, value); + value.copyFrom(result); + //Writables.copyWritable(result, value); // src , targer return true; } return false; @@ -302,7 +303,8 @@ public boolean next(ImmutableBytesWritable key, Result value) key.set(result.getRow()); } lastSuccessfulRow = key.get(); - Writables.copyWritable(result, value); + value.copyFrom(result); +// Writables.copyWritable(result, value); return true; } else { @@ -352,7 +354,8 @@ public boolean next(ImmutableBytesWritable key, Result value) key.set(result.getRow()); } lastSuccessfulRow = key.get(); - Writables.copyWritable(result, value); + value.copyFrom(result); +// Writables.copyWritable(result, value); return true; } else { @@ -408,7 +411,8 @@ public boolean next(ImmutableBytesWritable key, Result value) key.set(result.getRow()); } lastSuccessfulRow = key.get(); - Writables.copyWritable(result, value); + value.copyFrom(result); +// Writables.copyWritable(result, value); return true; } else { LOG.debug(String.format("+ Key (%s) return an EMPTY result. Get (%s)", Bytes.toString(nextKey), theGet.getId()) ); //alg0 diff --git a/src/main/java/parallelai/spyglass/hbase/HBaseTap.java b/src/main/java/parallelai/spyglass/hbase/HBaseTap.java index 3576e96..07e0945 100644 --- a/src/main/java/parallelai/spyglass/hbase/HBaseTap.java +++ b/src/main/java/parallelai/spyglass/hbase/HBaseTap.java @@ -143,7 +143,7 @@ public Path getPath() { return new Path(SCHEME + ":/" + tableName.replaceAll(":", "_")); } - protected HBaseAdmin getHBaseAdmin(JobConf conf) throws MasterNotRunningException, ZooKeeperConnectionException { + protected HBaseAdmin getHBaseAdmin(JobConf conf) throws MasterNotRunningException, ZooKeeperConnectionException, IOException { if (hBaseAdmin == null) { Configuration hbaseConf = HBaseConfiguration.create(conf); hBaseAdmin = new HBaseAdmin(hbaseConf); diff --git a/src/main/resources/pom.xml b/src/main/resources/pom.xml index d0fa51a..0323209 100644 --- a/src/main/resources/pom.xml +++ b/src/main/resources/pom.xml @@ -8,7 +8,7 @@ Cascading and Scalding wrapper for HBase with advanced features parallelai parallelai.spyglass - 2.10_0.10_4.4 + 2.10_0.12_5.3.0_beta jar @@ -68,26 +68,26 @@ com.twitter scalding-core_2.10 - 0.10.0 + 0.12.0 org.apache.hadoop hadoop-core - 2.0.0-mr1-cdh4.5.0 + 2.5.0-mr1-cdh5.3.0 org.apache.hadoop hadoop-common - 2.0.0-cdh4.5.0 + 2.5.0-cdh5.3.0 org.apache.hbase hbase - 0.94.6-cdh4.5.0 + 0.98.6-cdh5.3.0 diff --git a/src/main/scala/parallelai/spyglass/hbase/example/HBaseExampleRunner.scala b/src/main/scala/parallelai/spyglass/hbase/example/HBaseExampleRunner.scala index c503247..e0d171b 100644 --- a/src/main/scala/parallelai/spyglass/hbase/example/HBaseExampleRunner.scala +++ b/src/main/scala/parallelai/spyglass/hbase/example/HBaseExampleRunner.scala @@ -7,7 +7,6 @@ import parallelai.spyglass.base.JobRunner import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.{HColumnDescriptor, HTableDescriptor, HBaseConfiguration} import org.apache.hadoop.hbase.client.{Put, HTable, HConnectionManager, HBaseAdmin} -import org.apache.hadoop.hbase.io.hfile.Compression import org.apache.hadoop.hbase.regionserver.StoreFile import org.apache.hadoop.hbase.util.Bytes import parallelai.spyglass.hbase.HBaseSalter @@ -71,4 +70,4 @@ object HBaseExampleRunner extends App { "--output", output, "--debug", "true", "--job.lib.path", jobLibPath, "--quorum", quorum)) -} \ No newline at end of file +}