diff --git a/.gitignore b/.gitignore
index 225dd761a7b8..ded7f5cd729d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@
*.iml
*.orig
*~
+patchprocess/
diff --git a/CHANGES.txt b/CHANGES.txt
index 52d21202fe9c..68b6b2eacf66 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,7 +1,1948 @@
HBase Change Log
-Release 0.93.0 - Unreleased
- *DO NOT ADD ISSUES HERE ON COMMIT ANY MORE. WE'LL GENERATE THE LIST
- FROM JIRA INSTEAD WHEN WE MAKE A RELEASE*
+Release 0.94.28 - 01/04/2016
+Sub-task
+
+ [HBASE-14748] - Update 0.94 apidocs and xref on website
+
+Bug
+
+ [HBASE-12921] - Port HBASE-5356 'region_mover.rb can hang if table region it belongs to is deleted' to 0.94
+ [HBASE-13454] - SecureClient#setupIOStreams should handle all Exceptions
+ [HBASE-13651] - Handle StoreFileScanner FileNotFoundException
+ [HBASE-14799] - Commons-collections object deserialization remote command execution vulnerability
+ [HBASE-14830] - Fix broken links in 0.94 generated docs
+ [HBASE-15054] - Allow 0.94 to compile with JDK8
+ [HBASE-15059] - Allow 0.94 to compile against Hadoop 2.7.x
+
+Improvement
+
+ [HBASE-13344] - Add enforcer rule that matches our JDK support statement
+
+Task
+
+ [HBASE-14747] - Make it possible to build Javadoc and xref reports for 0.94 again
+
+Release 0.94.27 - 03/18/2015
+Sub-task
+
+ [HBASE-12776] - SpliTransaction: Log number of files to be split
+
+Bug
+
+ [HBASE-10528] - DefaultBalancer selects plans to move regions onto draining nodes
+ [HBASE-12792] - [backport] HBASE-5835: Catch and handle NotServingRegionException when close region attempt fails
+ [HBASE-12801] - Failed to truncate a table while maintaing binary region boundaries
+ [HBASE-12968] - [0.94]SecureServer should not ignore CallQueueSize
+ [HBASE-13039] - Add patchprocess/* to .gitignore to fix builds of branches
+ [HBASE-13131] - ReplicationAdmin leaks connections if there's an error in the constructor
+ [HBASE-13229] - Specify bash for local-regionservers.sh and local-master-backup.sh
+
+Improvement
+
+ [HBASE-11195] - Potentially improve block locality during major compaction for old regions
+ [HBASE-12223] - MultiTableInputFormatBase.getSplits is too slow
+ [HBASE-12720] - Make InternalScan LimitedPrivate
+
+Task
+
+ [HBASE-13020] - Add 'patchprocess/*' to RAT excludes on all branches
+
+
+Release 0.94.26 - 12/16/2014
+Bug
+
+ [HBASE-12279] - Generated thrift files were generated with the wrong parameters
+ [HBASE-12491] - TableMapReduceUtil.findContainingJar() NPE
+ [HBASE-12635] - Delete acl notify znode of table after the table is deleted
+ [HBASE-12657] - The Region is not being split and far exceeds the desired maximum size.
+ [HBASE-12692] - NPE from SnapshotManager#stop
+
+Release 0.94.25 - 11/7/2014
+Bug
+
+ [HBASE-12039] - Lower log level for TableNotFoundException log message when throwing
+ [HBASE-12065] - Import tool is not restoring multiple DeleteFamily markers of a row
+ [HBASE-12146] - RegionServerTracker should escape data in log messages
+ [HBASE-12171] - Backport: PerformanceEvaluation: getSplits doesn't provide right splits.
+ [HBASE-12336] - RegionServer failed to shutdown for NodeFailoverWorker thread
+ [HBASE-12376] - HBaseAdmin leaks ZK connections if failure starting watchers (ConnectionLossException)
+
+Improvement
+
+ [HBASE-12272] - Generate Thrift code through maven
+
+Task
+
+ [HBASE-12235] - Backport to 0.94: HBASE-9002 TestDistributedLogSplitting.testRecoverdEdits should test correct region
+ [HBASE-12381] - Add maven enforcer rules for build assumptions
+
+
+Release 0.94.24 - 09/29/2014
+Sub-task
+
+ [HBASE-11923] - Potential race condition in RecoverableZookeeper.checkZk()
+ [HBASE-11963] - Synchronize peer cluster replication connection attempts
+ [HBASE-12023] - HRegion.applyFamilyMapToMemstore creates too many iterator objects.
+ [HBASE-12077] - FilterLists create many ArrayList$Itr objects per row.
+
+Bug
+
+ [HBASE-11405] - Multiple invocations of hbck in parallel disables balancer permanently
+ [HBASE-11957] - Backport to 0.94 HBASE-5974 Scanner retry behavior with RPC timeout on next() seems incorrect
+ [HBASE-12019] - hbase-daemon.sh overwrite HBASE_ROOT_LOGGER and HBASE_SECURITY_LOGGER variables
+ [HBASE-12020] - String formatting on each RPC Invoke
+ [HBASE-12022] - Payloads on Failure attempt to serialize the byte[] into strings.
+ [HBASE-12114] - Meta table cache hashing may access the wrong table
+
+Improvement
+
+ [HBASE-12090] - Bytes: more Unsafe, more Faster
+
+Task
+
+ [HBASE-12103] - Backport HFileV1Detector to 0.94
+ [HBASE-12113] - Backport to 0.94: HBASE-5525 Truncate and preserve region boundaries option
+
+
+Release 0.94.23 - 08/26/2014
+Bug
+
+ [HBASE-9746] - RegionServer can't start when replication tries to replicate to an unknown host
+ [HBASE-10834] - Better error messaging on issuing grant commands in non-authz mode
+ [HBASE-11232] - Add MultiRowMutation tests.
+ [HBASE-11536] - Puts of region location to Meta may be out of order which causes inconsistent of region location
+ [HBASE-11641] - TestDistributedLogSplitting.testMasterStartsUpWithLogSplittingWork fails frequently
+ [HBASE-11652] - Port HBASE-3270 and HBASE-11650 to 0.94 - create cluster id and version file in a tmp location and move it into place
+ [HBASE-11767] - [0.94] Unnecessary garbage produced by schema metrics during scanning
+
+Improvement
+
+ [HBASE-11667] - Comment ClientScanner logic for NSREs.
+ [HBASE-11754] - [Shell] Record table property SPLITS_FILE in descriptor
+
+Task
+
+ [HBASE-11690] - Backport HBASE-5934 (Add the ability for Performance Evaluation to set the table compression) to 0.94
+ [HBASE-11691] - Backport HBASE-7156 (Add Data Block Encoding and -D opts to Performance Evaluation) to 0.94
+ [HBASE-11693] - Backport HBASE-11026 (Provide option to filter out all rows in PerformanceEvaluation tool) to 0.94
+
+
+Release 0.94.22 - 07/31/2014
+Bug
+
+ [HBASE-10645] - Fix wrapping of Requests Counts Regionserver level metrics
+ [HBASE-11360] - SnapshotFileCache causes too many cache refreshes
+ [HBASE-11479] - SecureConnection can't be closed when SecureClient is stopping because InterruptedException won't be caught in SecureClient#setupIOstreams()
+ [HBASE-11496] - HBASE-9745 broke cygwin CLASSPATH translation
+ [HBASE-11552] - Read/Write requests count metric value is too short
+ [HBASE-11565] - Stale connection could stay for a while
+ [HBASE-11633] - [0.94] port HBASE-11217 Race between SplitLogManager task creation + TimeoutMonitor
+
+Improvement
+
+ [HBASE-2217] - VM OPTS for shell only
+ [HBASE-7910] - Dont use reflection for security
+ [HBASE-11444] - Remove use of reflection for User#getShortName
+ [HBASE-11450] - Improve file size info in SnapshotInfo tool
+ [HBASE-11480] - ClientScanner might not close the HConnection created in construction
+ [HBASE-11623] - mutateRowsWithLocks might require updatesLock.readLock with waitTime=0
+
+
+Release 0.94.21 - 06/27/2014
+Bug
+
+ [HBASE-10692] - The Multi TableMap job don't support the security HBase cluster
+ [HBASE-11052] - Sending random data crashes thrift service
+ [HBASE-11096] - stop method of Master and RegionServer coprocessor is not invoked
+ [HBASE-11234] - FastDiffDeltaEncoder#getFirstKeyInBlock returns wrong result
+ [HBASE-11341] - ZKProcedureCoordinatorRpcs should respond only to members
+ [HBASE-11414] - Backport to 0.94: HBASE-7711 rowlock release problem with thread interruptions in batchMutate
+
+Improvement
+
+ [HBASE-8495] - Change ownership of the directory to bulk load
+ [HBASE-10871] - Indefinite OPEN/CLOSE wait on busy RegionServers
+
+New Feature
+
+ [HBASE-10935] - support snapshot policy where flush memstore can be skipped to prevent production cluster freeze
+
+
+Release 0.94.20 - 05/23/2014
+Sub-task
+
+ [HBASE-10936] - Add zeroByte encoding test
+
+Bug
+
+ [HBASE-10958] - [dataloss] Bulk loading with seqids can prevent some log entries from being replayed
+ [HBASE-11110] - Ability to load FilterList class is dependent on context classloader
+ [HBASE-11143] - Improve replication metrics
+ [HBASE-11188] - "Inconsistent configuration" for SchemaMetrics is always shown
+ [HBASE-11212] - Fix increment index in KeyValueSortReducer
+ [HBASE-11225] - Backport fix for HBASE-10417 'index is not incremented in PutSortReducer#reduce()'
+ [HBASE-11247] - [0.94] update maven-site-plugin to 3.3
+
+Improvement
+
+ [HBASE-11008] - Align bulk load, flush, and compact to require Action.CREATE
+ [HBASE-11119] - Update ExportSnapShot to optionally not use a tmp file on external file system
+ [HBASE-11128] - Add -target option to ExportSnapshot to export with a different name
+ [HBASE-11134] - Add a -list-snapshots option to SnapshotInfo
+
+
+Release 0.94.19 - 04/21/2014
+Bug
+
+ [HBASE-10118] - Major compact keeps deletes with future timestamps
+ [HBASE-10312] - Flooding the cluster with administrative actions leads to collapse
+ [HBASE-10533] - commands.rb is giving wrong error messages on exceptions
+ [HBASE-10766] - SnapshotCleaner allows to delete referenced files
+ [HBASE-10805] - Speed up KeyValueHeap.next() a bit
+ [HBASE-10807] - -ROOT- still stale in table.jsp if it moved
+ [HBASE-10845] - Memstore snapshot size isn't updated in DefaultMemStore#rollback()
+ [HBASE-10847] - 0.94: drop non-secure builds, make security the default
+ [HBASE-10848] - Filter SingleColumnValueFilter combined with NullComparator does not work
+ [HBASE-10966] - RowCounter misinterprets column names that have colons in their qualifier
+ [HBASE-10991] - Port HBASE-10639 'Unload script displays wrong counts (off by one) when unloading regions' to 0.94
+ [HBASE-11003] - ExportSnapshot is using the wrong fs when staging dir is not in fs.defaultFS
+ [HBASE-11030] - HBaseTestingUtility.getMiniHBaseCluster should be able to return null
+
+Task
+
+ [HBASE-10921] - Port HBASE-10323 'Auto detect data block encoding in HFileOutputFormat' to 0.94 / 0.96
+
+Test
+
+ [HBASE-10782] - Hadoop2 MR tests fail occasionally because of mapreduce.jobhistory.address is no set in job conf
+ [HBASE-10969] - TestDistributedLogSplitting fails frequently in 0.94.
+ [HBASE-10982] - TestZKProcedure.testMultiCohortWithMemberTimeoutDuringPrepare fails frequently in 0.94
+ [HBASE-10987] - Increase timeout in TestZKLeaderManager.testLeaderSelection
+ [HBASE-10988] - Properly wait for server in TestThriftServerCmdLine
+ [HBASE-10989] - TestAccessController needs better timeout
+ [HBASE-10996] - TestTableSnapshotInputFormatScan fails frequently on 0.94
+ [HBASE-11010] - TestChangingEncoding is unnecessarily slow
+ [HBASE-11017] - TestHRegionBusyWait.testWritesWhileScanning fails frequently in 0.94
+ [HBASE-11022] - Increase timeout for TestHBaseFsck.testSplitDaughtersNotInMeta
+ [HBASE-11024] - TestSecureLoadIncrementalHFilesSplitRecovery should wait longer for ACL table
+ [HBASE-11029] - Increase wait in TestSplitTransactionOnCluster.split
+ [HBASE-11037] - Race condition in TestZKBasedOpenCloseRegion
+ [HBASE-11040] - TestAccessController, TestAccessControllerFilter, and TestTablePermissions need to wait longer to ACL table
+ [HBASE-11042] - TestForceCacheImportantBlocks OOMs occasionally in 0.94
+
+
+Release 0.94.18 - 03/14/2014
+Bug
+
+ [HBASE-9708] - Improve Snapshot Name Error Message
+ [HBASE-9778] - Add hint to ExplicitColumnTracker to avoid seeking
+ [HBASE-10514] - Forward port HBASE-10466, possible data loss when failed flushes
+ [HBASE-10549] - When there is a hole, LoadIncrementalHFiles will hang in an infinite loop.
+ [HBASE-10575] - ReplicationSource thread can't be terminated if it runs into the loop to contact peer's zk ensemble and fails continuously
+ [HBASE-10583] - backport HBASE-8402 to 0.94 - ScanMetrics depends on number of rpc calls to the server.
+ [HBASE-10594] - Speed up TestRestoreSnapshotFromClient
+ [HBASE-10598] - Written data can not be read out because MemStore#timeRangeTracker might be updated concurrently
+ [HBASE-10614] - Master could not be stopped
+ [HBASE-10622] - Improve log and Exceptions in Export Snapshot
+ [HBASE-10624] - Fix 2 new findbugs warnings introduced by HBASE-10598
+ [HBASE-10627] - A logic mistake in HRegionServer isHealthy
+ [HBASE-10631] - Avoid extra seek on FileLink open
+ [HBASE-10642] - Add M/R over snapshots to 0.94
+ [HBASE-10669] - [hbck tool] Usage is wrong for hbck tool for -sidelineCorruptHfiles option
+ [HBASE-10682] - region_mover.rb throws "can't convert nil into String" for regions moved
+ [HBASE-10712] - Backport HBASE-8304 to 0.94 and 0.96
+ [HBASE-10716] - [Configuration]: hbase.regionserver.region.split.policy should be part of hbase-default.xml
+ [HBASE-10718] - TestHLogSplit fails when it sets a KV size to be negative
+ [HBASE-10722] - [0.94] HRegion.computeHDFSBlocksDistribution does not account for links and reference files.
+ [HBASE-10731] - Fix environment variables typos in scripts
+ [HBASE-10738] - AssignmentManager should shut down executors on stop
+ [HBASE-10745] - Access ShutdownHook#fsShutdownHooks should be synchronized
+ [HBASE-10751] - TestHRegion testWritesWhileScanning occasional fail since HBASE-10514 went in
+
+Improvement
+
+ [HBASE-8604] - improve reporting of incorrect peer address in replication
+
+Test
+
+ [HBASE-9914] - Port fix for HBASE-9836 'Intermittent TestRegionObserverScannerOpenHook#testRegionObserverCompactionTimeStacking failure' to 0.94
+
+
+Release 0.94.17 - 02/18/2014
+Bug
+
+ [HBASE-7963] - HBase VerifyReplication not working when security enabled
+ [HBASE-10249] - TestReplicationSyncUpTool fails because failover takes too long
+ [HBASE-10274] - MiniZookeeperCluster should close ZKDatabase when shutdown ZooKeeperServers
+ [HBASE-10319] - HLog should roll periodically to allow DN decommission to eventually complete.
+ [HBASE-10320] - Avoid ArrayList.iterator() ExplicitColumnTracker
+ [HBASE-10335] - AuthFailedException in zookeeper may block replication forever
+ [HBASE-10340] - [BACKPORT] HBASE-9892 Add info port to ServerName to support multi instances in a node
+ [HBASE-10363] - [0.94] TestInputSampler and TestInputSamplerTool fail under hadoop 2.0/23 profiles.
+ [HBASE-10371] - Compaction creates empty hfile, then selects this file for compaction and creates empty hfile and over again
+ [HBASE-10383] - Secure Bulk Load for 'completebulkload' fails for version 0.94.15
+ [HBASE-10400] - [hbck] Continue if region dir missing on region merge attempt
+ [HBASE-10401] - [hbck] perform overlap group merges in parallel
+ [HBASE-10448] - ZKUtil create and watch methods don't set watch in some cases
+ [HBASE-10470] - Import generates huge log file while importing large amounts of data
+ [HBASE-10481] - API Compatibility JDiff script does not properly handle arguments in reverse order
+ [HBASE-10482] - ReplicationSyncUp doesn't clean up its ZK, needed for tests
+ [HBASE-10485] - PrefixFilter#filterKeyValue() should perform filtering on row key
+ [HBASE-10489] - TestImportExport fails in 0.94 with Hadoop2
+ [HBASE-10493] - InclusiveStopFilter#filterKeyValue() should perform filtering on row key
+ [HBASE-10501] - Improve IncreasingToUpperBoundRegionSplitPolicy to avoid too many regions
+ [HBASE-10505] - Import.filterKv does not call Filter.filterRowKey
+ [HBASE-10506] - Fail-fast if client connection is lost before the real call be executed in RPC layer
+ [HBASE-10508] - Backport HBASE-10365 'HBaseFsck should clean up connection properly when repair is completed' to 0.94 and 0.96
+ [HBASE-10539] - HRegion.addAndGetGlobalMemstoreSize returns previous size
+ [HBASE-10545] - RS Hangs waiting on region to close on shutdown; has to timeout before can go down
+ [HBASE-10546] - Two scanner objects are open for each hbase map task but only one scanner object is closed
+ [HBASE-10551] - Change local mode back to one RS thread by default
+ [HBASE-10552] - HFilePerformanceEvaluation.GaussianRandomReadBenchmark fails sometimes.
+ [HBASE-10555] - Backport HBASE-8519 to 0.94, Backup master will never come up if primary master dies during initialization
+ [HBASE-10562] - Fix TestMultiTableInputFormat for Hadoop 2 in 0.94
+
+Improvement
+
+ [HBASE-10212] - New rpc metric: number of active handler
+ [HBASE-10423] - Report back the message of split or rollback failure to the master
+ [HBASE-10457] - Print corrupted file information in SnapshotInfo tool without -file option
+
+Task
+
+ [HBASE-10473] - Add utility for adorning http Context
+
+Test
+
+ [HBASE-10480] - TestLogRollPeriod#testWithEdits may fail due to insufficient waiting
+
+
+Release 0.94.16 - 01/10/2014
+Sub-task
+
+ [HBASE-10257] - [0.94] Master aborts due to assignment race
+
+Bug
+
+ [HBASE-7226] - HRegion.checkAndMutate uses incorrect comparison result for <, <=, > and >=
+ [HBASE-8558] - Add timeout limit for HBaseClient dataOutputStream
+ [HBASE-8912] - [0.94] AssignmentManager throws IllegalStateException from PENDING_OPEN to OFFLINE
+ [HBASE-9346] - HBCK should provide an option to check if regions boundaries are the same in META and in stores.
+ [HBASE-10078] - Dynamic Filter - Not using DynamicClassLoader when using FilterList
+ [HBASE-10193] - Cleanup HRegion if one of the store fails to open at region initialization
+ [HBASE-10214] - Regionserver shutdown improperly and leaves the dir in .old not deleted
+ [HBASE-10215] - TableNotFoundException should be thrown after removing stale znode in ETH
+ [HBASE-10225] - Bug in calls to RegionObsever.postScannerFilterRow
+ [HBASE-10250] - [0.94] TestHLog fails occasionally
+ [HBASE-10268] - TestSplitLogWorker occasionally fails
+ [HBASE-10272] - Cluster becomes nonoperational if the node hosting the active Master AND ROOT/META table goes offline
+ [HBASE-10273] - AssignmentManager.regions and AssignmentManager.servers are not always updated in tandem
+ [HBASE-10279] - TestStore.testDeleteExpiredStoreFiles is flaky
+ [HBASE-10281] - TestMultiParallel.testFlushCommitsNoAbort fails frequently in 0.94
+ [HBASE-10284] - Build broken with svn 1.8
+ [HBASE-10286] - Revert HBASE-9593, breaks RS wildcard addresses
+ [HBASE-10306] - Backport HBASE-6820 to 0.94, MiniZookeeperCluster should ensure that ZKDatabase is closed upon shutdown()
+
+Improvement
+
+ [HBASE-10285] - All for configurable policies in ChaosMonkey
+
+Test
+
+ [HBASE-10259] - [0.94] Upgrade JUnit to 4.11
+
+
+Release 0.94.15 - 12/17/2013
+Bug
+
+ [HBASE-7886] - [replication] hlog zk node will not be deleted if client roll hlog
+ [HBASE-9485] - TableOutputCommitter should implement recovery if we don't want jobs to start from 0 on RM restart
+ [HBASE-9995] - Not stopping ReplicationSink when using custom implementation for the ReplicationSink
+ [HBASE-10014] - HRegion#doMiniBatchMutation rollbacks the memstore even if there is nothing to rollback.
+ [HBASE-10015] - Replace intrinsic locking with explicit locks in StoreScanner
+ [HBASE-10026] - HBaseAdmin#createTable could fail if region splits too fast
+ [HBASE-10046] - Unmonitored HBase service could accumulate Status objects and OOM
+ [HBASE-10057] - TestRestoreFlushSnapshotFromClient and TestRestoreSnapshotFromClient fail to finish occasionally
+ [HBASE-10061] - TableMapReduceUtil.findOrCreateJar calls updateMap(null, ) resulting in thrown NPE
+ [HBASE-10064] - AggregateClient.validateParameters can throw NPE
+ [HBASE-10089] - Metrics intern table names cause eventual permgen OOM in 0.94
+ [HBASE-10111] - Verify that a snapshot is not corrupted before restoring it
+ [HBASE-10112] - Hbase rest query params for maxVersions and maxValues are not parsed
+ [HBASE-10117] - Avoid synchronization in HRegionScannerImpl.isFilterDone
+ [HBASE-10120] - start-hbase.sh doesn't respect --config in non-distributed mode
+ [HBASE-10179] - HRegionServer underreports readRequestCounts by 1 under certain conditions
+ [HBASE-10181] - HBaseObjectWritable.readObject catches DoNotRetryIOException and wraps it back in a regular IOException
+
+Improvement
+
+ [HBASE-9931] - Optional setBatch for CopyTable to copy large rows in batches
+ [HBASE-10001] - Add a coprocessor to help testing the performances without taking into account the i/o
+ [HBASE-10007] - PerformanceEvaluation: Add sampling and latency collection to randomRead test
+ [HBASE-10010] - eliminate the put latency spike on the new log file beginning
+ [HBASE-10048] - Add hlog number metric in regionserver
+ [HBASE-10049] - Small improvments in region_mover.rb
+ [HBASE-10093] - Unregister ReplicationSource metric bean when the replication source thread is terminated
+
+New Feature
+
+ [HBASE-9047] - Tool to handle finishing replication when the cluster is offline
+ [HBASE-10119] - Allow HBase coprocessors to clean up when they fail
+
+Task
+
+ [HBASE-9927] - ReplicationLogCleaner#stop() calls HConnectionManager#deleteConnection() unnecessarily
+ [HBASE-9986] - Incorporate HTTPS support for HBase (0.94 port)
+
+Test
+
+ [HBASE-10058] - Test for HBASE-9915 (avoid reading index blocks)
+ [HBASE-10189] - Intermittent TestReplicationSyncUpTool failure
+
+
+Release 0.94.14 - 11/18/2013
+Sub-task
+
+ [HBASE-9165] - Improvements to addDependencyJars
+
+Bug
+
+ [HBASE-9138] - getHaseIntegrationTestingUtility() is misspelled
+ [HBASE-9799] - Change Hadoop 1.2 dependency to 1.2.1
+ [HBASE-9809] - RegionTooBusyException should provide region name which was too busy
+ [HBASE-9834] - Minimize byte[] copies for 'smart' clients
+ [HBASE-9849] - [REST] Forbidden schema delete in read only mode
+ [HBASE-9850] - Issues with UI for table compact/split operation completion. After split/compaction operation using UI, the page is not automatically redirecting back using IE8/Firefox.
+ [HBASE-9865] - Reused WALEdits in replication may cause RegionServers to go OOM
+ [HBASE-9872] - ModifyTable does not modify the attributes of a newly modified/changed ColumnDescriptor
+ [HBASE-9890] - MR jobs are not working if started by a delegated user
+ [HBASE-9902] - Region Server is starting normally even if clock skew is more than default 30 seconds(or any configured). -> Regionserver node time is greater than master node time
+ [HBASE-9906] - Restore snapshot fails to restore the meta edits sporadically
+ [HBASE-9915] - Performance: isSeeked() in EncodedScannerV2 always returns false
+ [HBASE-9952] - Snapshot restore may fail due to NullPointerException
+ [HBASE-9956] - Remove keyLength cache from KeyValue
+ [HBASE-9970] - HBase BulkLoad, table is creating with the timestamp key also as a column to the table.
+ [HBASE-9971] - Port part of HBASE-9958 to 0.94 - change lock scope in locateRegion
+ [HBASE-9975] - Not starting ReplicationSink when using custom implementation for the ReplicationSink.
+ [HBASE-9993] - 0.94: HBASE-9865 breaks coprocessor compatibility with WALEdit.
+
+Improvement
+
+ [HBASE-4654] - [replication] Add a check to make sure we don't replicate to ourselves
+ [HBASE-8438] - Extend bin/hbase to print a "mapreduce classpath"
+ [HBASE-9715] - Backport -in_memory option support for LoadTestTool from trunk
+ [HBASE-9831] - 'hbasefsck.numthreads' property isn't passed to hbck via cmdline -D option
+ [HBASE-9894] - remove the inappropriate assert statement in Store.getSplitPoint()
+ [HBASE-9963] - Remove the ReentrantReadWriteLock in the MemStore
+
+Test
+
+ [HBASE-8397] - improve unit-test coverage of package org.apache.hadoop.hbase.master.metrics (0.94)
+ [HBASE-8543] - fix coverage org.apache.hadoop.hbase.rest.client
+ [HBASE-8552] - fix coverage org.apache.hadoop.hbase.rest.filter
+ [HBASE-8556] - fix coverage org.apache.hadoop.hbase.metrics.histogram
+ [HBASE-8557] - fix coverage org.apache.hadoop.hbase.rest.metrics
+ [HBASE-8559] - increase unit-test coverage of package org.apache.hadoop.hbase.coprocessor
+
+
+Release 0.94.13 - 10/29/2013
+Sub-task
+
+ [HBASE-9711] - Improve HBASE-9428 - avoid copying bytes for RegexFilter unless necessary
+
+Bug
+
+ [HBASE-7600] - TestAdmin.testCreateBadTables is failing occasionally
+ [HBASE-8521] - Cells cannot be overwritten with bulk loaded HFiles
+ [HBASE-9430] - Memstore heapSize calculation - DEEP_OVERHEAD is incorrect
+ [HBASE-9504] - Backport HBASE-1212 to 0.94
+ [HBASE-9548] - Cleanup SnapshotTestingUtils
+ [HBASE-9607] - Data loss after snapshot restore into cloned table
+ [HBASE-9649] - HFilePrettyPrinter should not throw a NPE if FirstKey or LastKey is null.
+ [HBASE-9651] - Backport HBASE-3890 'Scheduled tasks in distributed log splitting not in sync with ZK' to 0.94
+ [HBASE-9727] - HBase Rest Server - DELETE scanner operation is a no-op
+ [HBASE-9731] - updatesBlockedSeconds RegionServer metric should not be a histogram
+ [HBASE-9732] - Static AtomicLong updated in StoreFileScanner every (re)seek
+ [HBASE-9737] - Corrupt HFile cause resource leak leading to Region Server OOM
+ [HBASE-9745] - Append HBASE_CLASSPATH to end of Java classpath and use another env var for prefix
+ [HBASE-9747] - PrefixFilter with OR condition gives wrong results
+ [HBASE-9751] - Excessive readpoints checks in StoreFileScanner
+ [HBASE-9753] - Excessive readpoint checks in MemstoreScanner
+ [HBASE-9783] - o.a.h.h.r.HRegion.mutateRow() with non-existent CF cause NPE
+ [HBASE-9789] - Change logging for Coprocessor exec call to trace
+ [HBASE-9807] - block encoder unnecessarily copies the key for each reseek
+ [HBASE-9842] - Backport HBASE-9593 and HBASE-8667 to 0.94
+ [HBASE-9847] - HConnectionImplementation does not connect to new active master
+
+Improvement
+
+ [HBASE-9221] - Provide interface for getting a User in the client
+ [HBASE-9488] - Improve performance for small scan
+ [HBASE-9716] - LoadTestTool should provide default min and max settings to the data generator
+ [HBASE-9749] - Custom threadpool for Coprocessor obtained HTables
+
+Task
+
+ [HBASE-9819] - Backport HBASE-8372 'Provide mutability to CompoundConfiguration' to 0.94
+
+Test
+
+ [HBASE-8553] - improve unit-test coverage of package org.apache.hadoop.hbase.mapreduce.hadoopbackport
+ [HBASE-9851] - TestHBaseFsck.testQuarantineMissingHFile is flaky
+ [HBASE-9852] - TestRpcMetrics.testCustomMetrics is flaky
+
+
+Release 0.94.12 - 9/19/2013
+Sub-task
+
+ [HBASE-9277] - REST should use listTableNames to list tables
+ [HBASE-9279] - Thrift should use listTableNames to list tables
+
+Bug
+
+ [HBASE-7709] - Infinite loop possible in Master/Master replication
+ [HBASE-7954] - Fix the retrying logic of memstore flushes to avoid extra sleep
+ [HBASE-8760] - possible loss of data in snapshot taken after region split
+ [HBASE-8930] - Filter evaluates KVs outside requested columns
+ [HBASE-9167] - ServerCallable retries just once if timeout is not integer.max
+ [HBASE-9182] - Allow non-admin users to list all table names
+ [HBASE-9195] - Fix TestFSHDFSUtils against java7 test re-ordering
+ [HBASE-9207] - An Offline SplitParent Region can be assigned breaking split references
+ [HBASE-9231] - Multipage book is generated to the wrong location
+ [HBASE-9233] - isTableAvailable() may be stuck if an offline parent was never assigned
+ [HBASE-9252] - HConnectionManager#getZooKeeperWatcher() should be deprecated in 0.94
+ [HBASE-9256] - HBaseClient#setupIOStreams should handle all exceptions
+ [HBASE-9266] - Javadoc: Document that HBaseAdmin.flush(...) is synchronous
+ [HBASE-9286] - [0.94] ageOfLastShippedOp replication metric doesn't update if the slave regionserver is stalled
+ [HBASE-9301] - Default hbase.dynamic.jars.dir to hbase.rootdir/jars
+ [HBASE-9303] - Snapshot restore of table which splits after snapshot was taken encounters 'Region is not online'
+ [HBASE-9326] - ServerName is created using getLocalSocketAddress, breaks binding to the wildcard address. Revert HBASE-8640
+ [HBASE-9329] - SnapshotManager should check for directory existance before throwing a warning.
+ [HBASE-9344] - RegionServer not shutting down upon KeeperException in open region
+ [HBASE-9356] - [0.94] SecureServer.INSECURE_VERSIONS is declared incorrectly
+ [HBASE-9397] - Snapshots with the same name are allowed to proceed concurrently
+ [HBASE-9415] - In rpcServer, replicationQueue is initialized with the max queue size instead of the max queue lenght
+ [HBASE-9428] - Regex filters are at least an order of magnitude slower since 0.94.3
+ [HBASE-9429] - Add back MetaScanner.allTableRegions(Configuration conf,byte[] tablename,boolean offlined) method
+ [HBASE-9432] - Backport HBASE-8781 to 0.94
+ [HBASE-9448] - [0.94] Shell needs to fall back after HBASE-9182 if talking to older servers
+ [HBASE-9455] - Port HBASE-7113 'TestGzipFilter is flaky with jdk1.7' to 0.94
+ [HBASE-9468] - Previous active master can still serves RPC request when it is trying recovering expired zk session
+ [HBASE-9482] - Do not enforce secure Hadoop for secure HBase
+ [HBASE-9506] - [0.94] Backport HBASE-9309 The links in the backup masters template are bad
+ [HBASE-9534] - Short-Circuit Coprocessor HTable access when on the same server
+ [HBASE-9566] - Add back WALEdit#get/setScopes method
+ [HBASE-9584] - Short-Circuit Coprocessor doesn't correctly lookup table when on server
+
+Improvement
+
+ [HBASE-9243] - Add more useful statistics in the HFile tool
+ [HBASE-9314] - Dropping a table always prints a TableInfoMissingException in the master log
+
+Task
+
+ [HBASE-9153] - Introduce/update a script to generate jdiff reports
+ [HBASE-9377] - Backport HBASE- 9208 "ReplicationLogCleaner slow at large scale"
+
+Test
+
+ [HBASE-9287] - TestCatalogTracker depends on the execution order
+
+
+Release 0.94.11 - 8/13/2013
+Sub-task
+
+ [HBASE-8779] - Add mutateRow method support to Thrift2
+ [HBASE-8946] - Add a new function to Thrift 2 to open scanner, get results and close scanner
+ [HBASE-8947] - Thrift 2 : Replace "bool writeToWAL" with "TDurability durability"
+ [HBASE-8948] - Bound table pool size in Thrift 2 server
+
+Bug
+
+ [HBASE-6826] - [WINDOWS] TestFromClientSide failures
+ [HBASE-8067] - TestHFileArchiving.testArchiveOnTableDelete sometimes fails
+ [HBASE-8670] - [0.94] Backport HBASE-8449,HBASE-8204 and HBASE-8699 to 0.94 (Refactor recoverLease retries and pauses)
+ [HBASE-8698] - potential thread creation in MetaScanner.metaScan
+ [HBASE-8935] - IntegrationTestBigLinkedList fails under load on 0.94 due to some scan issues - add logging
+ [HBASE-8949] - hbase.mapreduce.hfileoutputformat.blocksize should configure with blocksize of a table
+ [HBASE-9026] - RestartRsHoldingRoot action in org.apache.hadoop.hbase.util.ChaosMonkey restarting the server holding .META. instead of -ROOT-
+ [HBASE-9032] - Result.getBytes() returns null if backed by KeyValue array
+ [HBASE-9048] - HCM throws NullPointerException under load
+ [HBASE-9050] - HBaseClient#call could hang
+ [HBASE-9060] - ExportSnapshot job fails if target path contains percentage character
+ [HBASE-9079] - FilterList getNextKeyHint skips rows that should be included in the results
+ [HBASE-9080] - Retain assignment should be used when re-enabling table(s)
+ [HBASE-9085] - Integration Tests fails because of bug in teardown phase where the cluster state is not being restored properly.
+ [HBASE-9087] - Handlers being blocked during reads
+ [HBASE-9097] - Set HBASE_CLASSPATH before rest of the classpath
+ [HBASE-9115] - HTableInterface.append operation may overwrites values
+ [HBASE-9120] - ClassFinder logs errors that are not
+ [HBASE-9146] - TestHTablePool hangs when run as part of runMediumTests profile
+ [HBASE-9158] - Serious bug in cyclic replication
+ [HBASE-9189] - IncreasingToUpperBoundRegionSplitPolicy.shouldSplit() should check all the stores before returning.
+ [HBASE-9200] - HFilePrettyPrinter finds incorrect largest row
+
+Improvement
+
+ [HBASE-6580] - Deprecate HTablePool in favor of HConnection.getTable(...)
+ [HBASE-8995] - Add hadoop-1.2 profile
+ [HBASE-9019] - Port HBASE-8690: Reduce unnecessary getFileStatus hdfs calls in TTL hfile and hlog cleanners to 0.94
+ [HBASE-9029] - Backport HBASE-8706 Some improvement in snapshot to 0.94
+ [HBASE-9132] - Use table dir modtime to avoid scanning table dir to check cached table descriptor in 0.94
+ [HBASE-9139] - Independent timeout configuration for rpc channel between cluster nodes
+
+New Feature
+
+ [HBASE-7826] - Improve Hbase Thrift v1 to return results in sorted order
+ [HBASE-8663] - a HBase Shell command to list the tables replicated from current cluster
+
+Test
+
+ [HBASE-8816] - Add support of loading multiple tables into LoadTestTool
+ [HBASE-9075] - [0.94] Backport HBASE-5760 Unit tests should write only under /target to 0.94
+ [HBASE-9090] - cleanup snapshot tests setup/teardown code
+ [HBASE-9106] - Do not fail TestAcidGuarantees for exceptions on table flush
+ [HBASE-9107] - [0.94] Backport HBASE-6950 TestAcidGuarantees system test now flushes too aggressively to 0.94
+
+
+Release 0.94.10 - 7/19/2013
+Sub-task
+
+ [HBASE-8774] - Add BatchSize and Filter to Thrift2
+ [HBASE-8819] - Port HBASE-5428 to Thrift 2
+ [HBASE-8826] - Ensure HBASE-8695 is covered in Thrift 2
+ [HBASE-8832] - Ensure HBASE-4658 is supported by Thrift 2
+ [HBASE-8876] - Addendum to HBASE-8774 Add BatchSize and Filter to Thrift2 - Add BatchSize Test
+ [HBASE-8938] - Thrift2 does not close scanner instance
+
+Bug
+
+ [HBASE-8432] - a table with unbalanced regions will balance indefinitely with the 'org.apache.hadoop.hbase.master.DefaultLoadBalancer'
+ [HBASE-8678] - Wrongly delete cells in some case which can not be deleted
+ [HBASE-8695] - The HBase thrift service ignores XML configuration
+ [HBASE-8776] - tweak retry settings some more (on trunk and 0.94)
+ [HBASE-8782] - Thrift2 can not parse values when using framed transport
+ [HBASE-8794] - DependentColumnFilter.toString() throws NullPointerException
+ [HBASE-8811] - REST service ignores misspelled "check=" parameter, causing unexpected mutations
+ [HBASE-8814] - Possible NPE in split if a region has empty store files.
+ [HBASE-8858] - Backport hbase-6979 "recovered.edits file should not break distributed log splitting"
+ [HBASE-8910] - HMaster.abortNow shouldn't try to become a master again if it was stopped
+ [HBASE-8967] - Duplicate call to snapshotManager.stop() in HRegionServer
+ [HBASE-8968] - Avoid call to zk in HRegionServer#getMaster() to log the master address which already read from the zk.
+ [HBASE-8971] - Bump 0.94 version to 0.94.10-SNAPSHOT
+ [HBASE-8988] - Reuse the thread pool in the shell to not run out of threads
+
+Improvement
+
+ [HBASE-8599] - HLogs in ZK are not cleaned up when replication lag is minimal
+ [HBASE-8767] - Backport hbase-8001 and hbase-8012, avoid lazy seek
+ [HBASE-8806] - Row locks are acquired repeatedly in HRegion.doMiniBatchMutation for duplicate rows.
+ [HBASE-8809] - Include deletes in the scan (setRaw) method does not respect the time range or the filter
+ [HBASE-8847] - Filter.transform() always applies unconditionally, even when combined in a FilterList
+ [HBASE-8908] - Backport HBASE-8882 and HBASE-8904 (An Integration Test to Test MTTR) to 0.94
+ [HBASE-8921] - [thrift2] Add GenericOptionsParser to Thrift 2 server
+ [HBASE-8945] - Backport to 0.94: HBASE-7952 Remove update() and Improve ExplicitColumnTracker performance.
+
+Task
+
+ [HBASE-8829] - Improve failed TestMetaScanner assert message so can see where/why failure
+
+Test
+
+ [HBASE-7770] - minor integration test framework fixes
+ [HBASE-8885] - Fix and reenable TestGet failing#testDynamicFilter
+ [HBASE-8914] - [0.94] TestRegionServerCoprocessorExceptionWithAbort is flaky
+ [HBASE-8928] - Make ChaosMonkey & LoadTest tools extensible, to allow addition of more actions and policies.
+ [HBASE-8934] - Fix bunch of flaky tests
+ [HBASE-8969] - Backport HBASE-8535+HBASE-8586 TestHCM#testDeleteForZKConnLeak enhancement to 0.94
+
+
+Release 0.94.9 - 6/24/2013
+Sub-task
+
+ [HBASE-8453] - TestImportExport failing again due to configuration issues
+
+Bug
+
+ [HBASE-8494] - TestRemoteAdmin#testClusterStatus should not assume 'requests' does not change
+ [HBASE-8522] - Archived hfiles and old hlogs may be deleted immediately by HFileCleaner, LogCleaner in HMaster
+ [HBASE-8555] - FilterList correctness may be affected by random ordering of sub-filter(list)
+ [HBASE-8590] - [0.94] BlockingMetaScannerVisitor should check for parent meta entry while waiting for split daughter
+ [HBASE-8639] - Poor performance of htable#getscanner in multithreaded environment due to DNS.getDefaultHost() being called in ScannerCallable#prepare()
+ [HBASE-8640] - ServerName in master may not initialize with the configured ipc address of hbase.master.ipc.address
+ [HBASE-8655] - Backport to 94 - HBASE-8346(Prefetching .META. rows in case only when useCache is set to true)
+ [HBASE-8656] - Rpc call may not be notified in SecureClient
+ [HBASE-8671] - Per-region WAL breaks CP backwards compatibility in 0.94 for non-enabled case
+ [HBASE-8684] - Table Coprocessor can't access external HTable by default
+ [HBASE-8700] - IntegrationTestBigLinkedList can fail due to random number collision
+ [HBASE-8724] - [0.94] ExportSnapshot should not use hbase.tmp.dir as a staging dir on hdfs
+ [HBASE-8742] - HTableDescriptor Properties not preserved when cloning
+ [HBASE-8743] - upgrade hadoop-23 version to 0.23.7
+ [HBASE-8749] - Potential race condition between FSUtils.renameAndSetModifyTime() and HFile/LogCleaner
+ [HBASE-8762] - Performance/operational penalty when calling HTable.get with a list of one Get
+ [HBASE-8783] - RSSnapshotManager.ZKProcedureMemberRpcs may be initialized with the wrong server name
+
+Improvement
+
+ [HBASE-5083] - Backup HMaster should have http infoport open with link to the active master
+ [HBASE-8609] - Make the CopyTable support startRow, stopRow options
+ [HBASE-8636] - Backport KeyValue Codec to 0.94 (HBASE-7413)
+ [HBASE-8683] - Add major compaction support in CompactionTool
+ [HBASE-8702] - Make WALEditCodec pluggable
+
+New Feature
+
+ [HBASE-8504] - HTable.getRegionsInRange() should provide a non-cached API
+
+Task
+
+ [HBASE-8603] - Backport HBASE-6921 to 0.94
+
+
+Release 0.94.8 - 5/22/2013
+Sub-task
+
+ [HBASE-8381] - TestTableInputFormatScan on Hadoop 2 fails because YARN kills our applications
+ [HBASE-8399] - TestTableInputFormatScan2#testScanFromConfiguration fails on hadoop2 profile
+
+Bug
+
+ [HBASE-7122] - Proper warning message when opening a log file with no entries (idle cluster)
+ [HBASE-7210] - Backport HBASE-6059 to 0.94
+ [HBASE-7921] - TestHFileBlock.testGzipCompression should ignore the block checksum
+ [HBASE-8282] - User triggered flushes does not allow compaction to get triggered even if compaction criteria is met
+ [HBASE-8327] - Consolidate class loaders
+ [HBASE-8354] - Backport HBASE-7878 'recoverFileLease does not check return value of recoverLease' to 0.94
+ [HBASE-8355] - BaseRegionObserver#pre(Compact|Flush|Store)ScannerOpen returns null
+ [HBASE-8377] - IntegrationTestBigLinkedList calculates wrap for linked list size incorrectly
+ [HBASE-8379] - bin/graceful_stop.sh does not return the balancer to original state
+ [HBASE-8385] - [SNAPSHOTS]: Restore fails to restore snapshot of a deleted table
+ [HBASE-8389] - HBASE-8354 forces Namenode into loop with lease recovery requests
+ [HBASE-8413] - Snapshot verify region will always fail if the HFile has been archived
+ [HBASE-8451] - MetricsMBeanBase has concurrency issues in init
+ [HBASE-8455] - Update ExportSnapshot to reflect changes in HBASE-7419
+ [HBASE-8464] - FastDiffEncoder - valueOffset calculation is incorrect
+ [HBASE-8483] - HConnectionManager can leak ZooKeeper connections when using deleteStaleConnection
+ [HBASE-8493] - Backport HBASE-8422, 'Master won't go down', to 0.94
+ [HBASE-8503] - Backport hbase-8483 "HConnectionManager can leak ZooKeeper connections when using deleteStaleConnection" to 0.94
+ [HBASE-8505] - References to split daughters should not be deleted separately from parent META entry
+ [HBASE-8509] - ZKUtil#createWithParents won't set data during znode creation when parent folder doesn't exit
+ [HBASE-8513] - [0.94] Fix class files with CRLF endings
+ [HBASE-8516] - FSUtils.create() fail with ViewFS
+ [HBASE-8525] - Use sleep multilier when choosing sinks in ReplicationSource
+ [HBASE-8530] - Refine error message from ExportSnapshot when there is leftover snapshot in target cluster
+ [HBASE-8538] - HBaseAdmin#isTableEnabled() should check table existence before checking zk state.
+ [HBASE-8539] - Double(or tripple ...) ZooKeeper listeners of the same type when Master recovers from ZK SessionExpiredException
+ [HBASE-8540] - SnapshotFileCache logs too many times if snapshot dir doesn't exists
+ [HBASE-8547] - Fix java.lang.RuntimeException: Cached an already cached block
+ [HBASE-8550] - 0.94 ChaosMonkey grep for master is too broad
+ [HBASE-8563] - Double count of read requests for Gets
+ [HBASE-8588] - [Documentation]: Add information about adding REST and Thrift API kerberos principals to HBase ACL table
+
+Improvement
+
+ [HBASE-5930] - Limits the amount of time an edit can live in the memstore.
+ [HBASE-6870] - HTable#coprocessorExec always scan the whole table
+ [HBASE-8345] - Add all available resources in o.a.h.h.rest.RootResource and VersionResource to o.a.h.h.rest.client.RemoteAdmin
+ [HBASE-8350] - enable ChaosMonkey to run commands as different users
+ [HBASE-8367] - LoadIncrementalHFiles does not return an error code nor throw Exception when failures occur due to timeouts
+ [HBASE-8383] - Support lib/*jar inside coprocessor jar
+ [HBASE-8405] - Add more custom options to how ClusterManager runs commands
+ [HBASE-8446] - Allow parallel snapshot of different tables
+
+New Feature
+
+ [HBASE-7965] - Port table locking to 0.94 (HBASE-7305, HBASE-7546, HBASE-7933)
+ [HBASE-8415] - DisabledRegionSplitPolicy
+
+Task
+
+ [HBASE-8574] - Add how to rename a table in the docbook
+
+Test
+
+ [HBASE-8508] - improve unit-test coverage of package org.apache.hadoop.hbase.metrics.file
+
+
+Release 0.94.7 - 4/24/2013
+Sub-task
+
+ [HBASE-7615] - Add metrics for snapshots
+ [HBASE-7801] - Allow a deferred sync option per Mutation.
+ [HBASE-8210] - Backport the LoadTest portions of HBASE-7383
+ [HBASE-8316] - JoinedHeap for non essential column families should reseek instead of seek
+
+Bug
+
+ [HBASE-7401] - Remove warning message about running 'hbase migrate'
+ [HBASE-7658] - grant with an empty string as permission should throw an exception
+ [HBASE-7817] - Suggested JDWP debug options in hbase-env.sh are wrong
+ [HBASE-7824] - Improve master start up time when there is log splitting work
+ [HBASE-7925] - Back port HBASE-6881 into 0.94
+ [HBASE-7961] - truncate on disabled table should throw TableNotEnabledException.
+ [HBASE-8014] - Backport HBASE-6915 to 0.94.
+ [HBASE-8030] - znode path of online region servers is hard coded in rolling_restart.sh
+ [HBASE-8044] - split/flush/compact/major_compact from hbase shell does not work for region key with \x format
+ [HBASE-8081] - Backport HBASE-7213 (separate hlog for meta tables) to 0.94
+ [HBASE-8092] - bulk assignment in 0.94 doesn't handle ZK errors very well
+ [HBASE-8096] - [replication] NPE while replicating a log that is acquiring a new block from HDFS
+ [HBASE-8118] - TestTablePermission depends on the execution order
+ [HBASE-8125] - HBASE-7435 breaks BuiltInGzipDecompressor on Hadoop < 1.0.x
+ [HBASE-8127] - Region of a disabling or disabled table could be stuck in transition state when RS dies during Master initialization
+ [HBASE-8128] - HTable#put improvements
+ [HBASE-8131] - Create table handler needs to handle failure cases.
+ [HBASE-8142] - Sporadic TestZKProcedureControllers failures on trunk
+ [HBASE-8146] - IntegrationTestBigLinkedList does not work on distributed setup
+ [HBASE-8150] - server should not produce RAITE for already-opening region in 0.94 (because master retry logic handles this case poorly)
+ [HBASE-8151] - Decode memstoreTS in HFileReaderV2 only when necessary
+ [HBASE-8158] - Backport HBASE-8140 "TableMapReduceUtils#addDependencyJar fails when nested inside another MR job"
+ [HBASE-8160] - HMaster#move doesn't check if master initialized
+ [HBASE-8166] - Avoid writing the memstoreTS into HFiles when possible
+ [HBASE-8169] - TestMasterFailover#testMasterFailoverWithMockedRITOnDeadRS may fail due to regions randomly assigned to a RS
+ [HBASE-8170] - HbaseAdmin.createTable cannot handle creating three regions
+ [HBASE-8176] - Backport HBASE-5335 "Dynamic Schema Configurations" to 0.94
+ [HBASE-8179] - JSON formatting for cluster status is sort of broken
+ [HBASE-8188] - Avoid unnecessary row compare in StoreScanner
+ [HBASE-8192] - Logic errror causes infinite loop in HRegion.bulkLoadHFiles(List)
+ [HBASE-8207] - Replication could have data loss when machine name contains hyphen "-"
+ [HBASE-8208] - In some situations data is not replicated to slaves when deferredLogSync is enabled
+ [HBASE-8211] - Support for NN HA for 0.94
+ [HBASE-8212] - Introduce a new separator instead of hyphen('-') for renaming recovered queues' znodes
+ [HBASE-8213] - global authorization may lose efficacy
+ [HBASE-8215] - Removing existing .regioninfo in writeRegioninfoOnFilesystem
+ [HBASE-8222] - User class should implement equals() and hashCode()
+ [HBASE-8225] - [replication] minor code bug when registering ReplicationLogCleaner
+ [HBASE-8226] - HBaseTestingUtility#waitUntilAllRegionsAssigned won't return if it counts "too many" regions
+ [HBASE-8229] - Replication code logs like crazy if a target table cannot be found.
+ [HBASE-8230] - Possible NPE on regionserver abort if replication service has not been started
+ [HBASE-8231] - delete tests in table_tests.rb(TestShell) always running on empty table.
+ [HBASE-8232] - TestAccessController occasionally fails with IndexOutOfBoundsException
+ [HBASE-8246] - Backport HBASE-6318 to 0.94 where SplitLogWorker exits due to ConcurrentModificationException
+ [HBASE-8259] - Snapshot backport in 0.94.6 breaks rolling restarts
+ [HBASE-8266] - Master cannot start if TableNotFoundException is thrown while partial table recovery
+ [HBASE-8270] - Backport HBASE-8097 'MetaServerShutdownHandler may potentially keep bumping up DeadServer.numProcessing' to 0.94
+ [HBASE-8274] - Backport to 94: HBASE-7488 Implement HConnectionManager.locateRegions which is currently returning null
+ [HBASE-8276] - Backport hbase-6738 to 0.94 "Too aggressive task resubmission from the distributed log manager"
+ [HBASE-8285] - HBaseClient never recovers for single HTable.get() calls with no retries when regions move
+ [HBASE-8288] - HBaseFileSystem: Refactoring and correct semantics for createPath methods
+ [HBASE-8303] - Increse the test timeout to 60s when they are less than 20s
+ [HBASE-8313] - Add Bloom filter testing for HFileOutputFormat
+ [HBASE-8326] - mapreduce.TestTableInputFormatScan times out frequently
+ [HBASE-8352] - Rename '.snapshot' directory
+ [HBASE-8427] - Apache Rat is incorrectly excluding test source files
+
+Improvement
+
+ [HBASE-7410] - [snapshots] add snapshot/clone/restore/export docs to ref guide
+ [HBASE-7599] - Port HBASE-6066 (low hanging read path improvements) to 0.94
+ [HBASE-8148] - Allow IPC to bind on a specific address
+ [HBASE-8152] - Avoid creating empty reference file when splitkey is outside the key range of a store file
+ [HBASE-8174] - Backport HBASE-8161(setting blocking file count on table level doesn't work) to 0.94
+ [HBASE-8198] - Backport HBASE-8063(Filter HFiles based on first/last key) into 0.94
+ [HBASE-8199] - Eliminate exception for ExportSnapshot against the null table snapshot (with no data in)
+ [HBASE-8209] - Improve LoadTest extensibility
+
+New Feature
+
+ [HBASE-1936] - ClassLoader that loads from hdfs; useful adding filters to classpath without having to restart services
+ [HBASE-7415] - [snapshots] Add task information to snapshot operation
+
+Task
+
+ [HBASE-7929] - Reapply hbase-7507 "Make memstore flush be able to retry after exception" to 0.94 branch.
+
+Test
+
+ [HBASE-8106] - Test to check replication log znodes move is done correctly
+ [HBASE-8260] - create generic integration test for trunk and 94 that is more deterministic, can be run for longer and is less aggressive
+
+
+Release 0.94.6.1 - 4/13/2013
+Bug
+
+ [HBASE-8259] - Snapshot backport in 0.94.6 breaks rolling restarts
+
+
+Release 0.94.6 - 3/14/2013
+Sub-task
+
+ [HBASE-7944] - Replication leaks file reader resource & not reset currentNbOperations
+
+Bug
+
+ [HBASE-6132] - ColumnCountGetFilter & PageFilter not working with FilterList
+ [HBASE-6347] - -ROOT- and .META. are stale in table.jsp if they moved
+ [HBASE-6748] - Endless recursive of deleteNode happened in SplitLogManager#DeleteAsyncCallback
+ [HBASE-7111] - hbase zkcli will not start if the zookeeper server chosen to connect to is unavailable
+ [HBASE-7153] - print gc option in hbase-env.sh affects hbase zkcli
+ [HBASE-7507] - Make memstore flush be able to retry after exception
+ [HBASE-7521] - fix HBASE-6060 (regions stuck in opening state) in 0.94
+ [HBASE-7624] - Backport HBASE-5359 and HBASE-7596 to 0.94
+ [HBASE-7671] - Flushing memstore again after last failure could cause data loss
+ [HBASE-7700] - TestColumnSeeking is mathematically bound to fail
+ [HBASE-7723] - Remove NameNode URI from ZK splitlogs
+ [HBASE-7725] - Add ability to create custom compaction request
+ [HBASE-7761] - MemStore.USEMSLAB_DEFAULT is false, hbase-default.xml says it's true
+ [HBASE-7763] - Compactions not sorting based on size anymore.
+ [HBASE-7768] - zkcluster in local mode not seeing configurations in hbase-{site|default}.xml
+ [HBASE-7777] - HBCK check for lingering split parents should check for child regions
+ [HBASE-7813] - Bug in BulkDeleteEndpoint kills entire rows on COLUMN/VERSION Deletes
+ [HBASE-7814] - Port HBASE-6963 'unable to run hbck on a secure cluster' to 0.94
+ [HBASE-7829] - zookeeper kerberos conf keytab and principal parameters interchanged
+ [HBASE-7832] - Use User.getShortName() in FSUtils
+ [HBASE-7833] - 0.94 does not compile with Hadoop-0.20.205 and 0.22.0
+ [HBASE-7851] - Include the guava classes as a dependency for jobs using mapreduce.TableMapReduceUtil
+ [HBASE-7866] - TestSplitTransactionOnCluster.testSplitBeforeSettingSplittingInZK failed 3 times in a row
+ [HBASE-7867] - setPreallocSize is different with COMMENT in setupTestEnv in MiniZooKeeperCluster.java
+ [HBASE-7869] - Provide way to not start LogSyncer thread
+ [HBASE-7876] - Got exception when manually triggers a split on an empty region
+ [HBASE-7883] - Update memstore size when removing the entries in append operation
+ [HBASE-7884] - ByteBloomFilter's performance can be improved by avoiding multiplication when generating hash
+ [HBASE-7913] - Secure Rest server should login before getting an instance of Rest servlet
+ [HBASE-7914] - Port the fix of HBASE-6748 into 0.94 branch
+ [HBASE-7915] - Secure ThriftServer needs to login before calling HBaseHandler
+ [HBASE-7916] - HMaster uses wrong InetSocketAddress parameter to throw exception
+ [HBASE-7919] - Wrong key is used in ServerManager#getServerConnection() to retrieve from Map serverConnections
+ [HBASE-7920] - Move isFamilyEssential(byte[] name) out of Filter interface in 0.94
+ [HBASE-7945] - Remove flaky TestCatalogTrackerOnCluster
+ [HBASE-7986] - [REST] Make HTablePool size configurable
+ [HBASE-7991] - Backport HBASE-6479 'HFileReaderV1 caching the same parent META block could cause server abort when splitting' to 0.94
+ [HBASE-8007] - Adopt TestLoadAndVerify from BigTop
+ [HBASE-8019] - Port HBASE-7779 '[snapshot 130201 merge] Fix TestMultiParallel' to 0.94
+ [HBASE-8025] - zkcli fails when SERVER_GC_OPTS is enabled
+ [HBASE-8040] - Race condition in AM after HBASE-7521 (only 0.94)
+ [HBASE-8055] - Null check missing in StoreFile.Reader.getMaxTimestamp()
+ [HBASE-8061] - Missing test from TestFlushSnapshotFromClient in 0.94
+ [HBASE-8069] - TestHLog is dependent on the execution order
+ [HBASE-8085] - Backport the fix for Bytes.toStringBinary() into 94 (HBASE-6991)
+ [HBASE-8099] - ReplicationZookeeper.copyQueuesFromRSUsingMulti should not return any queues if it failed to execute.
+ [HBASE-8103] - Fix pom so 0.94 can generate site reports
+
+Improvement
+
+ [HBASE-7818] - add region level metrics readReqeustCount and writeRequestCount
+ [HBASE-7827] - Improve the speed of Hbase Thirft Batch mutation for deletes
+ [HBASE-8031] - Adopt goraci as an Integration test
+
+New Feature
+
+ [HBASE-4210] - Allow coprocessor to interact with batches per region sent from a client
+ [HBASE-7360] - Snapshot 0.94 Backport
+
+Task
+
+ [HBASE-8088] - Versioning site: part one, put stake in the ground for 0.94 by copying current versions of book and site
+ [HBASE-8090] - Versioning site; part two, publish 0.94 site and add link from main site
+
+
+Release 0.94.5 - 2/7/2013
+Sub-task
+
+ [HBASE-2611] - Handle RS that fails while processing the failure of another one
+ [HBASE-7626] - Backport portions of HBASE-7460 to 0.94
+ [HBASE-7687] - TestCatalogTracker.testServerNotRunningIOException fails occasionally
+ [HBASE-7738] - REST server should publish metrics that are available via HTTP
+
+Bug
+
+ [HBASE-5458] - Thread safety issues with Compression.Algorithm.GZ and CompressionTest
+ [HBASE-6513] - Test errors when building on MacOS
+ [HBASE-6824] - Introduce ${hbase.local.dir} and save coprocessor jars there
+ [HBASE-7034] - Bad version, failed OPENING to OPENED but master thinks it is open anyways
+ [HBASE-7293] - [replication] Remove dead sinks from ReplicationSource.currentPeers and pick new ones
+ [HBASE-7423] - HFileArchiver should not use the configuration from the Filesystem
+ [HBASE-7468] - TestSplitTransactionOnCluster hangs frequently
+ [HBASE-7476] - HBase shell count command doesn't escape binary output
+ [HBASE-7497] - TestDistributedLogSplitting.testDelayedDeleteOnFailure times out occasionally
+ [HBASE-7498] - Make REST server thread pool size configurable
+ [HBASE-7499] - TestScannerTimeout timeout is too aggressive.
+ [HBASE-7502] - TestScannerTimeout fails on snapshot branch
+ [HBASE-7504] - -ROOT- may be offline forever after FullGC of RS
+ [HBASE-7505] - Server will hang when stopping cluster, caused by waiting for split threads
+ [HBASE-7506] - Judgment of carrying ROOT/META will become wrong when expiring server
+ [HBASE-7513] - HDFSBlocksDistribution shouldn't send NPEs when something goes wrong
+ [HBASE-7515] - Store.loadStoreFiles should close opened files if there's an exception
+ [HBASE-7524] - hbase-policy.xml is improperly set thus all rules in it can be by-passed
+ [HBASE-7530] - [replication] Work around HDFS-4380 else we get NPEs
+ [HBASE-7531] - [replication] NPE in SequenceFileLogReader because ReplicationSource doesn't nullify the reader
+ [HBASE-7534] - [replication] TestReplication.queueFailover can fail because HBaseTestingUtility.createMultiRegions is dangerous
+ [HBASE-7545] - [replication] Break out TestReplication into manageable classes
+ [HBASE-7549] - Make HTableInterface#batch() javadoc proper
+ [HBASE-7550] - Synchronization problem in AssignmentManager
+ [HBASE-7551] - nodeChildrenChange event may happen after the transition to RS_ZK_REGION_SPLITTING in SplitTransaction causing the SPLIT event to be missed in the master side.
+ [HBASE-7562] - ZKUtil: missing "else condition" in multi processing
+ [HBASE-7575] - FSUtils#getTableStoreFilePathMap should all ignore non-table folders
+ [HBASE-7578] - TestCatalogTracker hangs occasionally
+ [HBASE-7581] - TestAccessController depends on the execution order
+ [HBASE-7584] - Improve TestAccessController.testAppend
+ [HBASE-7587] - Fix two findbugs warning in RowResource
+ [HBASE-7592] - HConnectionManager.getHTableDescriptor() compares too much
+ [HBASE-7602] - TestFromClientSide.testPoolBehavior is incorrect
+ [HBASE-7617] - TestHRegionOnCluster.testDataCorrectnessReplayingRecoveredEdits still fails occasionally.
+ [HBASE-7628] - Port HBASE-6509 fast-forwarding FuzzyRowFilter to 0.94
+ [HBASE-7643] - HFileArchiver.resolveAndArchive() race condition may lead to snapshot data loss
+ [HBASE-7644] - Port HBASE-4802 'Disable show table metrics in bulk loader' to 0.94
+ [HBASE-7646] - Make forkedProcessTimeoutInSeconds configurable
+ [HBASE-7647] - 0.94 hfiles v2.1 are not backwards compatible with HFilev2.0
+ [HBASE-7648] - TestAcidGuarantees.testMixedAtomicity hangs sometimes
+ [HBASE-7654] - Add List getCoprocessors() to HTableDescriptor
+ [HBASE-7669] - ROOT region wouldn't be handled by PRI-IPC-Handler
+ [HBASE-7681] - Address some recent random test failures
+ [HBASE-7684] - NullPointerException in SecureClient when Call is cleaned up due to RPC timeout
+ [HBASE-7685] - Closing socket connection can't be removed from SecureClient
+ [HBASE-7693] - Hostname returned by TableInputFormatBase.reverseDNS contains trailing period
+ [HBASE-7694] - Secure HBase should use replication call queue
+ [HBASE-7698] - race between RS shutdown thread and openregionhandler causes region to get stuck
+ [HBASE-7702] - Adding filtering to Import jobs
+ [HBASE-7715] - FSUtils#waitOnSafeMode can incorrectly loop on standby NN
+ [HBASE-7717] - Wait until regions are assigned in TestSplitTransactionOnCluster
+ [HBASE-7728] - deadlock occurs between hlog roller and hlog syncer
+ [HBASE-7729] - TestCatalogTrackerOnCluster.testbadOriginalRootLocation fails occasionally
+ [HBASE-7730] - HBaseAdmin#synchronousBalanceSwitch is not compatible with 0.92
+ [HBASE-7731] - Append/Increment methods in HRegion don't check whether the table is readonly or not
+ [HBASE-7740] - Recheck matching row for joined scanners
+ [HBASE-7771] - Secure HBase Client in MR job causes tasks to wait forever
+ [HBASE-7772] - clusterId is not set in conf properly if only TableMapReduceUtil.initCredentials() is called
+ [HBASE-7776] - Use ErrorReporter/Log instead of System.out in hbck
+ [HBASE-7785] - rolling-restart.sh script unable to check expiration of master znode
+ [HBASE-7793] - Port HBASE-5564 Bulkload is discarding duplicate records to 0.94
+
+Improvement
+
+ [HBASE-3996] - Support multiple tables and scanners as input to the mapper in map/reduce jobs
+ [HBASE-5416] - Improve performance of scans with some kind of filters.
+ [HBASE-5498] - Secure Bulk Load
+ [HBASE-5664] - CP hooks in Scan flow for fast forward when filter filters out a row
+ [HBASE-7441] - Make ClusterManager in IntegrationTestingUtility pluggable
+ [HBASE-7540] - Make znode dump to print a dump of replication znodes
+ [HBASE-7561] - Display the total number of regions for a given table on the master webUI
+ [HBASE-7757] - Add web UI to REST server and Thrift server
+
+New Feature
+
+ [HBASE-6669] - Add BigDecimalColumnInterpreter for doing aggregations using AggregationClient
+ [HBASE-7748] - Add DelimitedKeyPrefixRegionSplitPolicy
+
+Wish
+
+ [HBASE-7705] - Make the method getCurrentPoolSize of HTablePool public
+
+
+Release 0.94.4 - 1/2/2013
+Sub-task
+
+ [HBASE-3776] - Add Bloom Filter Support to HFileOutputFormat
+ [HBASE-6206] - Large tests fail with jdk1.7
+ [HBASE-7009] - Port HBaseCluster interface/tests to 0.94
+ [HBASE-7042] - Master Coprocessor Endpoint
+ [HBASE-7282] - Backport Compaction Tool to 0.94
+ [HBASE-7331] - Add access control for region open and close, row locking, and stopping the regionserver
+ [HBASE-7336] - HFileBlock.readAtOffset does not work well with multiple threads
+ [HBASE-7371] - Blocksize in TestHFileBlock is unintentionally small
+ [HBASE-7399] - Health check chore for HMaster
+ [HBASE-7406] - Example health checker script
+ [HBASE-7431] - TestSplitTransactionOnCluster tests still flaky
+ [HBASE-7438] - TestSplitTransactionOnCluster has too many infinite loops
+
+Bug
+
+ [HBASE-6175] - TestFSUtils flaky on hdfs getFileStatus method
+ [HBASE-6317] - Master clean start up and Partially enabled tables make region assignment inconsistent.
+ [HBASE-6327] - HLog can be null when create table
+ [HBASE-6423] - Writes should not block reads on blocking updates to memstores
+ [HBASE-7091] - support custom GC options in hbase-env.sh
+ [HBASE-7158] - Allow CopyTable to identify the source cluster (for replication scenarios)
+ [HBASE-7165] - TestSplitLogManager.testUnassignedTimeout is flaky
+ [HBASE-7166] - TestSplitTransactionOnCluster tests are flaky
+ [HBASE-7172] - TestSplitLogManager.testVanishingTaskZNode() fails when run individually and is flaky
+ [HBASE-7177] - TestZooKeeperScanPolicyObserver.testScanPolicyObserver is flaky
+ [HBASE-7180] - RegionScannerImpl.next() is inefficient.
+ [HBASE-7205] - Coprocessor classloader is replicated for all regions in the HRegionServer
+ [HBASE-7214] - CleanerChore logs too much, so much so it obscures all else that is going on
+ [HBASE-7230] - port HBASE-7109 integration tests on cluster are not getting picked up from distribution to 0.94
+ [HBASE-7235] - TestMasterObserver is flaky
+ [HBASE-7251] - Avoid flood logs during client disconnect during batch get operation
+ [HBASE-7252] - TestSizeBasedThrottler fails occasionally
+ [HBASE-7259] - Deadlock in HBaseClient when KeeperException occured
+ [HBASE-7260] - Upgrade hadoop 1 dependency to hadoop 1.1.1
+ [HBASE-7273] - Upgrade zookeeper dependency to 3.4.5 for 0.94
+ [HBASE-7279] - Avoid copying the rowkey in RegionScanner, StoreScanner, and ScanQueryMatcher
+ [HBASE-7300] - HbckTestingUtil needs to keep a static executor to lower the number of threads used
+ [HBASE-7301] - Force ipv4 for unit tests
+ [HBASE-7307] - MetaReader.tableExists should not return false if the specified table regions has been split
+ [HBASE-7338] - Fix flaky condition for org.apache.hadoop.hbase.TestRegionRebalancing.testRebalanceOnRegionServerNumberChange
+ [HBASE-7342] - Split operation without split key incorrectly finds the middle key in off-by-one error
+ [HBASE-7343] - Fix flaky condition for TestDrainingServer
+ [HBASE-7357] - HBaseClient and HBaseServer should use hbase.security.authentication when negotiating authentication
+ [HBASE-7376] - Acquiring readLock does not apply timeout in HRegion#flushcache
+ [HBASE-7398] - [0.94 UNIT TESTS] TestAssignmentManager fails frequently on CentOS 5
+ [HBASE-7412] - Fix how HTableDescriptor handles default max file size and flush size
+ [HBASE-7417] - TestReplication is flaky
+ [HBASE-7421] - TestHFileCleaner->testHFileCleaning has an aggressive timeout
+ [HBASE-7422] - MasterFS doesn't set configuration for internal FileSystem
+ [HBASE-7432] - TestHBaseFsck prevents testsuite from finishing
+ [HBASE-7435] - BuiltInGzipDecompressor is only released during full GC
+ [HBASE-7440] - ReplicationZookeeper#addPeer is racy
+ [HBASE-7442] - HBase remote CopyTable not working when security enabled
+ [HBASE-7455] - Increase timeouts in TestReplication and TestSplitLogWorker
+ [HBASE-7464] - [REST] Sending HTML for errors is unhelpful
+ [HBASE-7466] - Fix junit dependency typo in 0.94
+ [HBASE-7467] - CleanerChore checkAndDeleteDirectory not deleting empty directories
+ [HBASE-7483] - TestHRegionOnCluster and TestSplitTransactionOnCluster are racy with HBaseAdmin.move()
+ [HBASE-7485] - TestSplitLogManager is still flaky on windows
+
+Improvement
+
+ [HBASE-4791] - Allow Secure Zookeeper JAAS configuration to be programmatically set (rather than only by reading JAAS configuration file)
+ [HBASE-5616] - Make compaction code standalone
+ [HBASE-5693] - When creating a region, the master initializes it and creates a memstore within the master server
+ [HBASE-5778] - Fix HLog compression's incompatibilities
+ [HBASE-5888] - Clover profile in build
+ [HBASE-6585] - Audit log messages should contain info about the higher level operation being executed
+ [HBASE-6775] - Use ZK.multi when available for HBASE-6710 0.92/0.94 compatibility fix
+ [HBASE-7190] - Add an option to hbck to check only meta and assignment
+ [HBASE-7197] - Add multi get to RemoteHTable
+ [HBASE-7199] - hbck should check lingering reference hfile and have option to sideline them automatically
+ [HBASE-7204] - Make hbck ErrorReporter pluggable
+ [HBASE-7231] - port HBASE-7200 create integration test for balancing regions and killing region servers to 0.94
+ [HBASE-7249] - add test name filter to IntegrationTestsDriver
+ [HBASE-7328] - IntegrationTestRebalanceAndKillServersTargeted supercedes IntegrationTestRebalanceAndKillServers, remove
+ [HBASE-7351] - Periodic health check chore
+ [HBASE-7359] - [REST] 'accessToken' in RemoteHTable is vestigial
+ [HBASE-7374] - Expose master table operations for coprocessors by way of MasterServices
+ [HBASE-7377] - Clean up TestHBase7051
+ [HBASE-7381] - Lightweight data transfer for Class Result
+ [HBASE-7469] - [REST] Share a HBaseAdmin instance
+ [HBASE-7472] - [REST] Support MIME type application/protobuf
+
+Task
+
+ [HBASE-5258] - Move coprocessors set out of RegionLoad
+ [HBASE-7170] - [0.94 branch] Allow HConnectionImplementation to reconnect to master multiple times
+ [HBASE-7283] - Backport HBASE-6564 + HBASE-7202 to 0.94
+ [HBASE-7341] - Deprecate RowLocks in 0.94
+
+
+Release 0.94.3 - 11/12/2012
+Sub-task
+
+ [HBASE-4913] - Per-CF compaction Via the Shell
+ [HBASE-6305] - TestLocalHBaseCluster hangs with hadoop 2.0/0.23 builds.
+ [HBASE-6925] - Change socket write size from 8K to 64K for HBaseServer
+ [HBASE-6996] - HRegion.mutateRowsWithLocks should call checkResources/checkReadOnly
+ [HBASE-7076] - Add test that increment/append properly integrate with MVCC
+ [HBASE-7077] - Test for: CheckAndPut should properly read MVCC
+ [HBASE-7078] - Add a test that append is atomic
+
+Bug
+
+ [HBASE-6389] - Modify the conditions to ensure that Master waits for sufficient number of Region Servers before starting region assignments
+ [HBASE-6583] - Enhance Hbase load test tool to automatically create column families if not present
+ [HBASE-6665] - ROOT region should not be splitted even with META row as explicit split key
+ [HBASE-6700] - [replication] empty znodes created during queue failovers aren't deleted
+ [HBASE-6728] - [89-fb] prevent OOM possibility due to per connection responseQueue being unbounded
+ [HBASE-6733] - [0.92 UNIT TESTS] TestReplication.queueFailover occasionally fails [Part-2]
+ [HBASE-6796] - Backport HBASE-5547, Don't delete HFiles in backup mode.
+ [HBASE-6843] - loading lzo error when using coprocessor
+ [HBASE-6846] - BitComparator bug - ArrayIndexOutOfBoundsException
+ [HBASE-6904] - In the HBase shell, an error is thrown that states replication-related znodes already exist
+ [HBASE-6958] - TestAssignmentManager sometimes fails
+ [HBASE-6974] - Metric for blocked updates
+ [HBASE-6978] - Minor typo in ReplicationSource SocketTimeoutException error handling
+ [HBASE-7017] - Backport "[replication] The replication-executor should make sure the file that it is replicating is closed before declaring success on that file" to 0.94
+ [HBASE-7018] - Fix and Improve TableDescriptor caching for bulk assignment
+ [HBASE-7021] - Default to Hadoop 1.0.4 in 0.94 and add Hadoop 1.1 profile
+ [HBASE-7037] - ReplicationPeer logs at WARN level aborting server instead of at FATAL
+ [HBASE-7048] - Regionsplitter requires the hadoop config path to be in hbase classpath
+ [HBASE-7051] - CheckAndPut should properly read MVCC
+ [HBASE-7060] - Region load balancing by table does not handle the case where a table's region count is lower than the number of the RS in the cluster
+ [HBASE-7069] - HTable.batch does not have to be synchronized
+ [HBASE-7086] - Enhance ResourceChecker to log stack trace for potentially hanging threads
+ [HBASE-7095] - Cannot set 'lenAsVal' for KeyOnlyFilter from shell
+ [HBASE-7103] - Need to fail split if SPLIT znode is deleted even before the split is completed.
+ [HBASE-7143] - TestMetaMigrationRemovingHTD fails when used with Hadoop 0.23/2.x
+
+Improvement
+
+ [HBASE-5257] - Allow INCLUDE_AND_NEXT_COL in filters and use it in ColumnPaginationFilter
+ [HBASE-5314] - Gracefully rolling restart region servers in rolling-restart.sh
+ [HBASE-5898] - Consider double-checked locking for block cache lock
+ [HBASE-6852] - SchemaMetrics.updateOnCacheHit costs too much while full scanning a table with all of its fields
+ [HBASE-6942] - Endpoint implementation for bulk deletion of data
+ [HBASE-6951] - Allow the master info server to be started in a read only mode.
+ [HBASE-7073] - OperationMetrics needs to cache the value of hbase.metrics.exposeOperationTimes
+ [HBASE-7089] - Allow filter to be specified for Get from HBase shell
+ [HBASE-7097] - Log message in SecureServer.class uses wrong class name
+ [HBASE-7151] - Better log message for Per-CF compactions
+
+Task
+
+ [HBASE-6032] - Port HFileBlockIndex improvement from HBASE-5987
+ [HBASE-7016] - port HBASE-6518 'Bytes.toBytesBinary() incorrect trailing backslash escape' to 0.94
+ [HBASE-7020] - Backport HBASE-6336 Split point should not be equal to start row or end row
+ [HBASE-7038] - Port HBASE-5970 Improve the AssignmentManager#updateTimer and speed up handling opened event to 0.94
+ [HBASE-7040] - Port HBASE-5867 Improve Compaction Throttle Default to 0.94
+ [HBASE-7053] - port blockcache configurability (part of HBASE-6312, and HBASE-7033) to 0.94
+ [HBASE-7087] - Add to NOTICE.txt a note on jamon being MPL
+
+Test
+
+ [HBASE-5984] - TestLogRolling.testLogRollOnPipelineRestart failed with HADOOP 2.0.0
+ [HBASE-7142] - TestSplitLogManager#testDeadWorker may fail because of hard limit on the TimeoutMonitor's timeout period
+
+
+Release 0.94.2 - 10/08/2012
+Sub-task
+
+ [HBASE-6257] - Avoid unnecessary flush & compact on Meta in admin.rb.
+ [HBASE-6496] - Example ZK based scan policy
+ [HBASE-6792] - Remove interface audience annotations in 0.94/0.92 introduced by HBASE-6516
+
+Bug
+
+ [HBASE-4565] - Maven HBase build broken on cygwin with copynativelib.sh call.
+ [HBASE-5292] - getsize per-CF metric incorrectly counts compaction related reads as well
+ [HBASE-5549] - Master can fail if ZooKeeper session expires
+ [HBASE-5997] - Fix concerns raised in HBASE-5922 related to HalfStoreFileReader
+ [HBASE-6165] - Replication can overrun .META. scans on cluster re-start
+ [HBASE-6211] - Put latencies in jmx
+ [HBASE-6263] - Use default mode for HBase Thrift gateway if not specified
+ [HBASE-6268] - Can't enable a table on a 0.94 cluster from a 0.92 client
+ [HBASE-6299] - RS starting region open while failing ack to HMaster.sendRegionOpen() causes inconsistency in HMaster's region state and a series of successive problems
+ [HBASE-6321] - ReplicationSource dies reading the peer's id
+ [HBASE-6340] - HBase RPC should allow protocol extension with common interfaces.
+ [HBASE-6359] - KeyValue may return incorrect values after readFields()
+ [HBASE-6364] - Powering down the server host holding the .META. table causes HBase Client to take excessively long to recover and connect to reassigned .META. table
+ [HBASE-6378] - the javadoc of setEnabledTable maybe not describe accurately
+ [HBASE-6432] - HRegionServer doesn't properly set clusterId in conf
+ [HBASE-6437] - Avoid admin.balance during master initialize
+ [HBASE-6438] - RegionAlreadyInTransitionException needs to give more info to avoid assignment inconsistencies
+ [HBASE-6447] - Common TestZooKeeper failures on jenkins: testMasterSessionExpired and testCreateSilentIsReallySilent
+ [HBASE-6450] - HBase startup should be with MALLOC_MAX_ARENA set
+ [HBASE-6460] - hbck "-repairHoles" usage inconsistent with "-fixHdfsOrphans"
+ [HBASE-6471] - Performance regression caused by HBASE-4054
+ [HBASE-6478] - TestClassLoading.testClassLoadingFromLibDirInJar occasionally fails
+ [HBASE-6488] - HBase wont run on IPv6 on OSes that use zone-indexes
+ [HBASE-6503] - HBase Shell Documentation For DROP Is Outdated
+ [HBASE-6504] - Adding GC details prevents HBase from starting in non-distributed mode
+ [HBASE-6512] - Incorrect OfflineMetaRepair log class name
+ [HBASE-6514] - unknown metrics type: org.apache.hadoop.hbase.metrics.histogram.MetricsHistogram
+ [HBASE-6516] - hbck cannot detect any IOException while ".tableinfo" file is missing
+ [HBASE-6520] - MSLab May cause the Bytes.toLong not work correctly for increment
+ [HBASE-6525] - bin/replication/copy_tables_desc.rb references non-existent class
+ [HBASE-6529] - With HFile v2, the region server will always perform an extra copy of source files
+ [HBASE-6537] - Race between balancer and disable table can lead to inconsistent cluster
+ [HBASE-6552] - TestAcidGuarantees system test should flush more aggressively
+ [HBASE-6561] - Gets/Puts with many columns send the RegionServer into an "endless" loop
+ [HBASE-6565] - Coprocessor exec result Map is not thread safe
+ [HBASE-6576] - HBaseAdmin.createTable should wait until the table is enabled
+ [HBASE-6579] - Unnecessary KV order check in StoreScanner
+ [HBASE-6587] - Region would be assigned twice in the case of all RS offline
+ [HBASE-6596] - Revert HBASE-5022; it undoes HBC.create
+ [HBASE-6602] - Region Server Dynamic Metrics can cause high cpu usage.
+ [HBASE-6603] - RegionMetricsStorage.incrNumericMetric is called too often
+ [HBASE-6608] - Fix for HBASE-6160, META entries from daughters can be deleted before parent entries, shouldn't compare HRegionInfo's
+ [HBASE-6615] - hbase.rs.evictblocksonclose seems to be ineffective
+ [HBASE-6616] - test failure in TestDelayedRpc#testTooManyDelayedRpcs
+ [HBASE-6621] - Reduce calls to Bytes.toInt
+ [HBASE-6623] - [replication] replication metrics value AgeOfLastShippedOp is not set correctly
+ [HBASE-6631] - TestHMasterRPCException in 0.92 failed twice on socket timeout
+ [HBASE-6632] - [0.92 UNIT TESTS] testCreateTableRPCTimeOut sets rpc timeout to 1500ms and leaves it (testHundredsOfTable fails w/ 1500ms timeout)
+ [HBASE-6638] - Move DaemonThreadFactory into Threads (0.94)
+ [HBASE-6641] - more message with DoNotRetryIOException in client
+ [HBASE-6647] - [performance regression] appendNoSync/HBASE-4528 doesn't take deferred log flush into account
+ [HBASE-6648] - [0.92 UNIT TESTS] TestMasterObserver.testRegionTransitionOperations fails occasionally
+ [HBASE-6649] - [0.92 UNIT TESTS] TestReplication.queueFailover occasionally fails [Part-1]
+ [HBASE-6662] - Region server incorrectly reports its own address as master's address
+ [HBASE-6663] - NPE race in HConnection if zookeeper is reset
+ [HBASE-6671] - Kerberos authenticated super user should be able to retrieve proxied delegation tokens
+ [HBASE-6679] - RegionServer aborts due to race between compaction and split
+ [HBASE-6685] - Thrift DemoClient.pl got NullPointerException
+ [HBASE-6686] - HFile Quarantine fails with missing dirs in hadoop 2.0
+ [HBASE-6688] - folder referred by thrift demo app instructions is outdated
+ [HBASE-6710] - 0.92/0.94 compatibility issues due to HBASE-5206
+ [HBASE-6711] - Avoid local results copy in StoreScanner
+ [HBASE-6713] - Stopping META/ROOT RS may take 50mins when some region is splitting
+ [HBASE-6714] - TestMultiSlaveReplication#testMultiSlaveReplication may fail
+ [HBASE-6734] - Code duplication in LoadIncrementalHFiles
+ [HBASE-6757] - Very inefficient behaviour of scan using FilterList
+ [HBASE-6762] - HBASE-6340 broke SecureRPCEngine
+ [HBASE-6769] - HRS.multi eats NoSuchColumnFamilyException since HBASE-5021
+ [HBASE-6784] - TestCoprocessorScanPolicy is sometimes flaky when run locally
+ [HBASE-6803] - script hbase should add JAVA_LIBRARY_PATH to LD_LIBRARY_PATH
+ [HBASE-6839] - Operations may be executed without holding rowLock
+ [HBASE-6842] - the jar used in coprocessor is not deleted in local which will exhaust the space of /tmp
+ [HBASE-6844] - upgrade 0.23 version dependency in 0.94
+ [HBASE-6847] - HBASE-6649 broke replication
+ [HBASE-6851] - Race condition in TableAuthManager.updateGlobalCache()
+ [HBASE-6853] - IllegalArgument Exception is thrown when an empty region is spliitted.
+ [HBASE-6854] - Deletion of SPLITTING node on split rollback should clear the region from RIT
+ [HBASE-6868] - Skip checksum is broke; are we double-checksumming by default?
+ [HBASE-6871] - HFileBlockIndex Write Error in HFile V2 due to incorrect split into intermediate index blocks
+ [HBASE-6888] - HBase scripts ignore any HBASE_OPTS set in the environment
+ [HBASE-6889] - Ignore source control files with apache-rat
+ [HBASE-6900] - RegionScanner.reseek() creates NPE when a flush or compaction happens before the reseek.
+ [HBASE-6901] - Store file compactSelection throws ArrayIndexOutOfBoundsException
+ [HBASE-6906] - TestHBaseFsck#testQuarantine* tests are flakey due to TableNotEnabledException
+ [HBASE-6912] - Filters are not properly applied in certain cases
+ [HBASE-6916] - HBA logs at info level errors that won't show in the shell
+ [HBASE-6920] - On timeout connecting to master, client can get stuck and never make progress
+ [HBASE-6927] - WrongFS using HRegionInfo.getTableDesc() and different fs for hbase.root and fs.defaultFS
+ [HBASE-6946] - JavaDoc missing from release tarballs
+
+Improvement
+
+ [HBASE-3271] - Allow .META. table to be exported
+ [HBASE-5582] - "No HServerInfo found for" should be a WARNING message
+ [HBASE-5631] - hbck should handle case where .tableinfo file is missing.
+ [HBASE-5714] - Add write permissions check before any hbck run that modifies hdfs.
+ [HBASE-5728] - Methods Missing in HTableInterface
+ [HBASE-6286] - Upgrade maven-compiler-plugin to 2.5.1
+ [HBASE-6291] - Don't retry increments on an invalid cell
+ [HBASE-6308] - Coprocessors should be loaded in a custom ClassLoader to prevent dependency conflicts with HBase
+ [HBASE-6373] - Add more context information to audit log messages
+ [HBASE-6444] - Expose the ability to set custom HTTP Request Headers for the REST client used by RemoteHTable
+ [HBASE-6458] - new comparator twice in checkAndPut, just reuse the first one
+ [HBASE-6522] - Expose locks and leases to Coprocessors
+ [HBASE-6586] - Quarantine Corrupted HFiles with hbck
+ [HBASE-6643] - Accept encoded region name in compacting/spliting region from shell
+ [HBASE-6644] - HBaseAdmin.createTable should wait more till table is enabled.
+ [HBASE-6860] - [replication] HBASE-6550 is too aggressive, DDOSes .META.
+ [HBASE-6914] - Scans/Gets/Mutations don't give a good error if the table is disabled.
+
+New Feature
+
+ [HBASE-6427] - Pluggable compaction and scan policies via coprocessors
+ [HBASE-6505] - Allow shared RegionObserver state
+ [HBASE-6550] - Refactoring ReplicationSink to make it more responsive of cluster health
+
+Task
+
+ [HBASE-5042] - TestReadWriteConsistencyControl should be renamed
+ [HBASE-6288] - In hbase-daemons.sh, description of the default backup-master file path is wrong
+ [HBASE-6538] - Remove copy_table.rb script
+
+Test
+
+ [HBASE-6507] - [hbck] TestHBaseFsck ran into TableNotEnabledException
+ [HBASE-6593] - TestAdmin times out sometimes
+
+
+Release 0.94.1 - 7/24/2012
+Sub-task
+
+ [HBASE-5342] - Grant/Revoke global permissions
+ [HBASE-5372] - Table mutation operations should check table level rights, not global rights
+ [HBASE-5385] - Delete table/column should delete stored permissions on -acl- table
+ [HBASE-5659] - TestAtomicOperation.testMultiRowMutationMultiThreads is still failing occasionally
+ [HBASE-6061] - Fix ACL "Admin" Table inconsistent permission check
+ [HBASE-6062] - preCheckAndPut/Delete() checks for READ when also a WRITE is performed
+ [HBASE-6092] - Authorize flush, split, compact operations in AccessController
+ [HBASE-6157] - Revoke of Global permission is not taking effect without restart.
+ [HBASE-6181] - TestStoreFile fails with jdk1.7
+ [HBASE-6188] - Remove the concept of table owner
+ [HBASE-6209] - ACL Corrections for AccessControllerProtocol apis
+ [HBASE-6224] - add Pre and Post coprocessor hooks for BulkLoad
+ [HBASE-6238] - Grant on META not taking effect
+ [HBASE-6252] - TABLE ADMIN should be allowed to relocate regions
+ [HBASE-6253] - Do not allow user to disable or drop ACL table
+ [HBASE-6292] - Compact can skip the security access control
+ [HBASE-6355] - Allow HBase to compile against JDK7
+
+Bug
+
+ [HBASE-4379] - [hbck] Does not complain about tables with no end region [Z,]
+ [HBASE-4470] - ServerNotRunningException coming out of assignRootAndMeta kills the Master
+ [HBASE-4891] - HTable.ClientScanner needs to clone the Scan object
+ [HBASE-5546] - Master assigns region in the original region server when opening region failed
+ [HBASE-5722] - NPE in ZKUtil#getChildDataAndWatchForNewChildren when ZK not available or NW down.
+ [HBASE-5733] - AssignmentManager#processDeadServersAndRegionsInTransition can fail with NPE.
+ [HBASE-5741] - ImportTsv does not check for table existence
+ [HBASE-5757] - TableInputFormat should handle as many errors as possible
+ [HBASE-5806] - Handle split region related failures on master restart and RS restart
+ [HBASE-5840] - Open Region FAILED_OPEN doesn't clear the TaskMonitor Status, keeps showing the old status
+ [HBASE-5853] - java.lang.RuntimeException: readObject can't find class org.apache.hadoop.hdfs.protocol.HdfsFileStatus
+ [HBASE-5874] - When 'fs.default.name' not configured, the hbck tool and Merge tool throw IllegalArgumentException.
+ [HBASE-5875] - Process RIT and Master restart may remove an online server considering it as a dead server
+ [HBASE-5876] - TestImportExport has been failing against hadoop 0.23 profile
+ [HBASE-5883] - Backup master is going down due to connection refused exception
+ [HBASE-5894] - Table deletion failed but HBaseAdmin#deletetable reports it as success
+ [HBASE-5902] - Some scripts are not executable
+ [HBASE-5909] - SlabStats should be a daemon thread
+ [HBASE-5916] - RS restart just before master intialization we make the cluster non operative
+ [HBASE-5918] - Master will block forever at startup if root server dies between assigning root and assigning meta
+ [HBASE-5922] - HalfStoreFileReader seekBefore causes StackOverflowError
+ [HBASE-5927] - SSH and DisableTableHandler happening together does not clear the znode of the region and RIT map.
+ [HBASE-5928] - Hbck shouldn't npe when there are no tables.
+ [HBASE-5955] - Guava 11 drops MapEvictionListener and Hadoop 2.0.0-alpha requires it
+ [HBASE-5963] - ClassCastException: FileSystem$Cache$ClientFinalizer cannot be cast to Thread
+ [HBASE-5964] - HFileSystem: "No FileSystem for scheme: hdfs"
+ [HBASE-5966] - MapReduce based tests broken on Hadoop 2.0.0-alpha
+ [HBASE-5975] - Failed suppression of fs shutdown hook with Hadoop 2.0.0
+ [HBASE-5986] - Clients can see holes in the META table when regions are being split
+ [HBASE-6002] - Possible chance of resource leak in HlogSplitter
+ [HBASE-6011] - Unable to start master in local mode
+ [HBASE-6016] - ServerShutdownHandler#processDeadRegion could return false for disabling table regions
+ [HBASE-6018] - hbck fails with a RejectedExecutionException when >50 regions present
+ [HBASE-6021] - NullPointerException when running LoadTestTool without specifying compression type
+ [HBASE-6029] - HBCK doesn't recover Balance switch if exception occurs in onlineHbck()
+ [HBASE-6046] - Master retry on ZK session expiry causes inconsistent region assignments.
+ [HBASE-6047] - Put.has() can't determine result correctly
+ [HBASE-6049] - Serializing "List" containing null elements will cause NullPointerException in HbaseObjectWritable.writeObject()
+ [HBASE-6050] - HLogSplitter renaming recovered.edits and CJ removing the parent directory race, making the HBCK think cluster is inconsistent.
+ [HBASE-6056] - Restore hbase-default version check
+ [HBASE-6065] - Log for flush would append a non-sequential edit in the hlog, leading to possible data loss
+ [HBASE-6068] - Secure HBase cluster : Client not able to call some admin APIs
+ [HBASE-6069] - TableInputFormatBase#createRecordReader() doesn't initialize TableRecordReader which causes NPE
+ [HBASE-6070] - AM.nodeDeleted and SSH races creating problems for regions under SPLIT
+ [HBASE-6088] - Region splitting not happened for long time due to ZK exception while creating RS_ZK_SPLITTING node
+ [HBASE-6089] - SSH and AM.joinCluster causes Concurrent Modification exception.
+ [HBASE-6095] - ActiveMasterManager NullPointerException
+ [HBASE-6115] - NullPointerException is thrown when root and meta table regions are assigning to another RS.
+ [HBASE-6122] - Backup master does not become Active master after ZK exception
+ [HBASE-6126] - Fix broke TestLocalHBaseCluster in 0.92/0.94
+ [HBASE-6133] - TestRestartCluster failing in 0.92
+ [HBASE-6141] - InterfaceAudience breaks 0.94 on older versions of hadoop
+ [HBASE-6146] - Disabling of Catalog tables should not be allowed
+ [HBASE-6158] - Data loss if the words 'merges' or 'splits' are used as Column Family name
+ [HBASE-6160] - META entries from daughters can be deleted before parent entries
+ [HBASE-6164] - Correct the bug in block encoding usage in bulkload
+ [HBASE-6185] - Update javadoc for ConstantSizeRegionSplitPolicy class
+ [HBASE-6195] - Increment data will be lost when the memstore is flushed
+ [HBASE-6200] - KeyComparator.compareWithoutRow can be wrong when families have the same prefix
+ [HBASE-6210] - Backport HBASE-6197 to 0.94
+ [HBASE-6227] - SSH and cluster startup causes data loss
+ [HBASE-6229] - AM.assign() should not set table state to ENABLED directly.
+ [HBASE-6236] - Offline meta repair fails if the HBase base mount point is on a different cluster/volume than its parent in a ViewFS or similar FS
+ [HBASE-6237] - Fix race on ACL table creation in TestTablePermissions
+ [HBASE-6240] - Race in HCM.getMaster stalls clients
+ [HBASE-6246] - Admin.move without specifying destination does not go through AccessController
+ [HBASE-6248] - Jetty init may fail if directory name contains "master"
+ [HBASE-6265] - Calling getTimestamp() on a KV in cp.prePut() causes KV not to be flushed
+ [HBASE-6269] - Lazyseek should use the maxSequenseId StoreFile's KeyValue as the latest KeyValue
+ [HBASE-6281] - Assignment need not be called for disabling table regions during clean cluster start up.
+ [HBASE-6284] - Introduce HRegion#doMiniBatchMutation()
+ [HBASE-6293] - HMaster does not go down while splitting logs even if explicit shutdown is called.
+ [HBASE-6303] - HCD.setCompressionType should use Enum support for storing compression types as strings
+ [HBASE-6311] - Data error after majorCompaction caused by keeping MVCC for opened scanners
+ [HBASE-6313] - Client hangs because the client is not notified
+ [HBASE-6319] - ReplicationSource can call terminate on itself and deadlock
+ [HBASE-6325] - [replication] Race in ReplicationSourceManager.init can initiate a failover even if the node is alive
+ [HBASE-6326] - Avoid nested retry loops in HConnectionManager
+ [HBASE-6328] - FSHDFSUtils#recoverFileLease tries to rethrow InterruptedException but actually shallows it
+ [HBASE-6329] - Stopping META regionserver when splitting region could cause daughter region to be assigned twice
+ [HBASE-6337] - [MTTR] Remove renaming tmp log file in SplitLogManager
+ [HBASE-6357] - Failed distributed log splitting stuck on master web UI
+ [HBASE-6369] - HTable is not closed in AggregationClient
+ [HBASE-6375] - Master may be using a stale list of region servers for creating assignment plan during startup
+ [HBASE-6377] - HBASE-5533 metrics miss all operations submitted via MultiAction
+ [HBASE-6380] - bulkload should update the store.storeSize
+ [HBASE-6392] - UnknownRegionException blocks hbck from sideline big overlap regions
+ [HBASE-6394] - verifyrep MR job map tasks throws NullPointerException
+ [HBASE-6397] - [hbck] print out bulk load commands for sidelined regions if necessary
+ [HBASE-6406] - TestReplicationPeer.testResetZooKeeperSession and TestZooKeeper.testClientSessionExpired fail frequently
+ [HBASE-6420] - Gracefully shutdown logsyncer
+ [HBASE-6426] - Add Hadoop 2.0.x profile to 0.92+
+ [HBASE-6440] - SplitLogManager - log the exception when failed to finish split log file
+ [HBASE-6443] - HLogSplitter should ignore 0 length files
+ [HBASE-6445] - rat check fails if hs_err_pid26514.log dropped in tests
+
+Improvement
+
+ [HBASE-4720] - Implement atomic update operations (checkAndPut, checkAndDelete) for REST client/server
+ [HBASE-5360] - [uberhbck] Add options for how to handle offline split parents.
+ [HBASE-5630] - hbck should disable the balancer using synchronousBalanceSwitch.
+ [HBASE-5802] - Change the default metrics class to NullContextWithUpdateThread
+ [HBASE-5838] - Add an LZ4 compression option to HFile
+ [HBASE-5887] - Make TestAcidGuarantees usable for system testing.
+ [HBASE-5892] - [hbck] Refactor parallel WorkItem* to Futures.
+ [HBASE-5913] - Speed up the full scan of META
+ [HBASE-5973] - Add ability for potentially long-running IPC calls to abort if client disconnects
+ [HBASE-6010] - Security audit logger configuration for log4j
+ [HBASE-6013] - Polish sharp edges from CopyTable
+ [HBASE-6022] - Include Junit in the libs when packaging so that TestAcidGaurntee can run
+ [HBASE-6023] - Normalize security audit logging level with Hadoop
+ [HBASE-6040] - Use block encoding and HBase handled checksum verification in bulk loading using HFileOutputFormat
+ [HBASE-6067] - HBase won't start when hbase.rootdir uses ViewFileSystem
+ [HBASE-6114] - CacheControl flags should be tunable per table schema per CF
+ [HBASE-6124] - Backport HBASE-6033 to 0.90, 0.92 and 0.94
+ [HBASE-6161] - Log Error when thrift server fails to start up.
+ [HBASE-6173] - hbck check specified tables only
+ [HBASE-6207] - Add jitter to client retry timer
+ [HBASE-6214] - Backport HBASE-5998 to 94.1
+ [HBASE-6244] - [REST] Result generators do not need to query table schema
+ [HBASE-6247] - [REST] HTablePool.putTable is deprecated
+ [HBASE-6267] - hbase.store.delete.expired.storefile should be true by default
+ [HBASE-6283] - [region_mover.rb] Add option to exclude list of hosts on unload instead of just assuming the source node.
+ [HBASE-6314] - Fast fail behavior for unauthenticated user
+ [HBASE-6332] - Improve POM for better integration with downstream ivy projects
+ [HBASE-6334] - TestImprovement for TestHRegion.testWritesWhileGetting
+ [HBASE-6341] - Publicly expose HConnectionKey
+ [HBASE-6363] - HBaseConfiguration can carry a main method that dumps XML output for debug purposes
+ [HBASE-6382] - Upgrade Jersey to 1.8 to match Hadoop 1 and 2
+ [HBASE-6384] - hbck should group together those sidelined regions need to be bulk loaded later
+ [HBASE-6433] - Improve HBaseServer#getRemoteAddress by utilizing HBaseServer.Connection.hostAddress
+
+New Feature
+
+ [HBASE-2730] - Expose RS work queue contents on web UI
+ [HBASE-4956] - Control direct memory buffer consumption by HBaseClient
+ [HBASE-5609] - Add the ability to pass additional information for slow query logging
+ [HBASE-5886] - Add new metric for possible data loss due to puts without WAL
+ [HBASE-6044] - copytable: remove rs.* parameters
+
+Task
+
+ [HBASE-6001] - Upgrade slf4j to 1.6.1
+ [HBASE-6034] - Upgrade Hadoop dependencies
+ [HBASE-6077] - Document the most common secure RPC troubleshooting resolutions
+ [HBASE-6129] - Backport of Add Increment Coalescing in thrift.
+ [HBASE-6131] - Add attribution for code added by HBASE-5533 metrics
+
+Test
+
+ [HBASE-5985] - TestMetaMigrationRemovingHTD failed with HADOOP 2.0.0
+
+
+Release 0.94.0 - 5/1/2012
+Sub-task
+
+ [HBASE-4343] - Get the TestAcidGuarantee unit test to fail consistently
+ [HBASE-4345] - Ensure that Scanners that read from the storefiles respect MVCC
+ [HBASE-4346] - Optimise the storage that we use for storing MVCC information.
+ [HBASE-4485] - Eliminate window of missing Data
+ [HBASE-4517] - Document new replication features in 0.92
+ [HBASE-4544] - Rename RWCC to MVCC
+ [HBASE-4594] - Ensure that KV's newer than the oldest-living-scanner is not accounted for the maxVersions during flush/compaction.
+ [HBASE-4661] - Ability to export the list of files for a some or all column families for a given region
+ [HBASE-4682] - Support deleted rows using Import/Export
+ [HBASE-4908] - HBase cluster test tool (port from 0.89-fb)
+ [HBASE-4911] - Clean shutdown
+ [HBASE-4979] - Setting KEEP_DELETE_CELLS fails in shell
+ [HBASE-4981] - add raw scan support to shell
+ [HBASE-4998] - Support deleted rows in CopyTable
+ [HBASE-5005] - Add DEFAULT_MIN_VERSIONS to HColumnDescriptor.DEFAULT_VALUES
+ [HBASE-5058] - Allow HBaseAdmin to use an existing connection
+ [HBASE-5096] - Replication does not handle deletes correctly.
+ [HBASE-5118] - Fix Scan documentation
+ [HBASE-5143] - Fix config typo in pluggable load balancer factory
+ [HBASE-5203] - Group atomic put/delete operation into a single WALEdit to handle region server failures.
+ [HBASE-5266] - Add documentation for ColumnRangeFilter
+ [HBASE-5346] - Fix testColumnFamilyCompression and test_TIMERANGE in TestHFileOutputFormat
+ [HBASE-5368] - Move PrefixSplitKeyPolicy out of the src/test into src, so it is accessible in HBase installs
+ [HBASE-5371] - Introduce AccessControllerProtocol.checkPermissions(Permission[] permissons) API
+ [HBASE-5413] - Rename RowMutation to RowMutations
+ [HBASE-5431] - Improve delete marker handling in Import M/R jobs
+ [HBASE-5460] - Add protobuf as M/R dependency jar
+ [HBASE-5497] - Add protobuf as M/R dependency jar (mapred)
+ [HBASE-5523] - Fix Delete Timerange logic for KEEP_DELETED_CELLS
+ [HBASE-5541] - Avoid holding the rowlock during HLog sync in HRegion.mutateRowWithLocks
+ [HBASE-5638] - Backport to 0.90 and 0.92 - NPE reading ZK config in HBase
+ [HBASE-5641] - decayingSampleTick1 prevents HBase from shutting down.
+ [HBASE-5793] - TestHBaseFsck#TestNoHdfsTable test hangs after client retries increased
+
+Bug
+
+ [HBASE-2856] - TestAcidGuarantee broken on trunk
+ [HBASE-3443] - ICV optimization to look in memstore first and then store files (HBASE-3082) does not work when deletes are in the mix
+ [HBASE-3690] - Option to Exclude Bulk Import Files from Minor Compaction
+ [HBASE-3987] - Fix a NullPointerException on a failure to load Bloom filter data
+ [HBASE-4065] - TableOutputFormat ignores failure to create table instance
+ [HBASE-4078] - Silent Data Offlining During HDFS Flakiness
+ [HBASE-4105] - Stargate does not support Content-Type: application/json and Content-Encoding: gzip in parallel
+ [HBASE-4116] - [stargate] StringIndexOutOfBoundsException in row spec parse
+ [HBASE-4326] - Tests that use HBaseTestingUtility.startMiniCluster(n) should shutdown with HBaseTestingUtility.shutdownMiniCluster.
+ [HBASE-4397] - -ROOT-, .META. tables stay offline for too long in recovery phase after all RSs are shutdown at the same time
+ [HBASE-4398] - If HRegionPartitioner is used in MapReduce, client side configurations are overwritten by hbase-site.xml.
+ [HBASE-4476] - Compactions must fail if column tracker gets columns out of order
+ [HBASE-4496] - HFile V2 does not honor setCacheBlocks when scanning.
+ [HBASE-4607] - Split log worker should terminate properly when waiting for znode
+ [HBASE-4609] - ThriftServer.getRegionInfo() is expecting old ServerName format, need to use new Addressing class instead
+ [HBASE-4610] - Port HBASE-3380 (Master failover can split logs of live servers) to 92/trunk (definitely bring in config params, decide if we need to do more to fix the bug)
+ [HBASE-4626] - Filters unnecessarily copy byte arrays...
+ [HBASE-4645] - Edits Log recovery losing data across column families
+ [HBASE-4648] - Bytes.toBigDecimal() doesn't use offset
+ [HBASE-4658] - Put attributes are not exposed via the ThriftServer
+ [HBASE-4673] - NPE in HFileReaderV2.close during major compaction when hfile.block.cache.size is set to 0
+ [HBASE-4679] - Thrift null mutation error
+ [HBASE-4691] - Remove more unnecessary byte[] copies from KeyValues
+ [HBASE-4729] - Clash between region unassign and splitting kills the master
+ [HBASE-4745] - LRU Statistics thread should be daemon
+ [HBASE-4769] - Abort RegionServer Immediately on OOME
+ [HBASE-4776] - HLog.closed should be checked inside of updateLock
+ [HBASE-4778] - Don't ignore corrupt StoreFiles when opening a region
+ [HBASE-4790] - Occasional TestDistributedLogSplitting failure
+ [HBASE-4792] - SplitRegionHandler doesn't care if it deletes the znode or not, leaves the parent region stuck offline
+ [HBASE-4795] - Fix TestHFileBlock when running on a 32-bit JVM
+ [HBASE-4797] - [availability] Skip recovered.edits files with edits we know older than what region currently has
+ [HBASE-4805] - Allow better control of resource consumption in HTable
+ [HBASE-4819] - TestShell broke in trunk; typo
+ [HBASE-4825] - TestRegionServersMetrics and TestZKLeaderManager are not categorized (small/medium/large)
+ [HBASE-4826] - Modify hbasetests.sh to take into account the new pom.xml with surefire
+ [HBASE-4832] - TestRegionServerCoprocessorExceptionWithAbort fails if the region server stops too fast
+ [HBASE-4853] - HBASE-4789 does overzealous pruning of seqids
+ [HBASE-4874] - Run tests with non-secure random, some tests hang otherwise
+ [HBASE-4878] - Master crash when splitting hlog may cause data loss
+ [HBASE-4886] - truncate fails in HBase shell
+ [HBASE-4890] - fix possible NPE in HConnectionManager
+ [HBASE-4932] - Block cache can be mistakenly instantiated by tools
+ [HBASE-4936] - Cached HRegionInterface connections crash when getting UnknownHost exceptions
+ [HBASE-4937] - Error in Quick Start Shell Exercises
+ [HBASE-4942] - HMaster is unable to start of HFile V1 is used
+ [HBASE-4946] - HTable.coprocessorExec (and possibly coprocessorProxy) does not work with dynamically loaded coprocessors (from hdfs or local system), because the RPC system tries to deserialize an unknown class.
+ [HBASE-4993] - Performance regression in minicluster creation
+ [HBASE-5003] - If the master is started with a wrong root dir, it gets stuck and can't be killed
+ [HBASE-5010] - Filter HFiles based on TTL
+ [HBASE-5015] - Remove some leaks in tests due to lack of HTable.close()
+ [HBASE-5026] - Add coprocessor hook to HRegionServer.ScannerListener.leaseExpired()
+ [HBASE-5027] - HConnection.create(final Connection conf) does not clone, it creates a new Configuration reading *.xmls and then does a merge.
+ [HBASE-5038] - Some tests leak connections
+ [HBASE-5041] - Major compaction on non existing table does not throw error
+ [HBASE-5051] - HBaseTestingUtility#getHBaseAdmin() creates a new HBaseAdmin instance at each call
+ [HBASE-5053] - HCM Tests leak connections
+ [HBASE-5055] - Build against hadoop 0.22 broken
+ [HBASE-5068] - RC1 can not build its hadoop-0.23 profile
+ [HBASE-5085] - fix test-patch script from setting the ulimit
+ [HBASE-5088] - A concurrency issue on SoftValueSortedMap
+ [HBASE-5091] - [replication] Update replication doc to reflect current znode structure
+ [HBASE-5097] - RegionObserver implementation whose preScannerOpen and postScannerOpen Impl return null can stall the system initialization through NPE
+ [HBASE-5099] - ZK event thread waiting for root region assignment may block server shutdown handler for the region sever the root region was on
+ [HBASE-5100] - Rollback of split could cause closed region to be opened again
+ [HBASE-5103] - Fix improper master znode deserialization
+ [HBASE-5120] - Timeout monitor races with table disable handler
+ [HBASE-5121] - MajorCompaction may affect scan's correctness
+ [HBASE-5141] - Memory leak in MonitoredRPCHandlerImpl
+ [HBASE-5152] - Region is on service before completing initialization when doing rollback of split, it will affect read correctness
+ [HBASE-5163] - TestLogRolling#testLogRollOnDatanodeDeath fails sometimes on Jenkins or hadoop QA ("The directory is already locked.")
+ [HBASE-5172] - HTableInterface should extend java.io.Closeable
+ [HBASE-5176] - AssignmentManager#getRegion: logging nit adds a redundant '+'
+ [HBASE-5182] - TBoundedThreadPoolServer threadKeepAliveTimeSec is not configured properly
+ [HBASE-5195] - [Coprocessors] preGet hook does not allow overriding or wrapping filter on incoming Get
+ [HBASE-5196] - Failure in region split after PONR could cause region hole
+ [HBASE-5200] - AM.ProcessRegionInTransition() and AM.handleRegion() race thus leaving the region assignment inconsistent
+ [HBASE-5206] - Port HBASE-5155 to 0.92, 0.94, and TRUNK
+ [HBASE-5212] - Fix test TestTableMapReduce against 0.23.
+ [HBASE-5213] - "hbase master stop" does not bring down backup masters
+ [HBASE-5221] - bin/hbase script doesn't look for Hadoop jars in the right place in trunk layout
+ [HBASE-5228] - [REST] Rip out "transform" feature
+ [HBASE-5267] - Add a configuration to disable the slab cache by default
+ [HBASE-5271] - Result.getValue and Result.getColumnLatest return the wrong column.
+ [HBASE-5278] - HBase shell script refers to removed "migrate" functionality
+ [HBASE-5281] - Should a failure in creating an unassigned node abort the master?
+ [HBASE-5282] - Possible file handle leak with truncated HLog file.
+ [HBASE-5283] - Request counters may become negative for heavily loaded regions
+ [HBASE-5286] - bin/hbase's logic of adding Hadoop jar files to the classpath is fragile when presented with split packaged Hadoop 0.23 installation
+ [HBASE-5288] - Security source code dirs missing from 0.92.0 release tarballs.
+ [HBASE-5290] - [FindBugs] Synchronization on boxed primitive
+ [HBASE-5292] - getsize per-CF metric incorrectly counts compaction related reads as well
+ [HBASE-5317] - Fix TestHFileOutputFormat to work against hadoop 0.23
+ [HBASE-5327] - Print a message when an invalid hbase.rootdir is passed
+ [HBASE-5331] - Off by one bug in util.HMerge
+ [HBASE-5345] - CheckAndPut doesn't work when value is empty byte[]
+ [HBASE-5348] - Constraint configuration loaded with bloat
+ [HBASE-5350] - Fix jamon generated package names
+ [HBASE-5351] - hbase completebulkload to a new table fails in a race
+ [HBASE-5364] - Fix source files missing licenses in 0.92 and trunk
+ [HBASE-5384] - Up heap used by hadoopqa
+ [HBASE-5387] - Reuse compression streams in HFileBlock.Writer
+ [HBASE-5398] - HBase shell disable_all/enable_all/drop_all promp wrong tables for confirmation
+ [HBASE-5415] - FSTableDescriptors should handle random folders in hbase.root.dir better
+ [HBASE-5420] - TestImportTsv does not shut down MR Cluster correctly (fails against 0.23 hadoop)
+ [HBASE-5423] - Regionserver may block forever on waitOnAllRegionsToClose when aborting
+ [HBASE-5425] - Punt on the timeout doesn't work in BulkEnabler#waitUntilDone (master's EnableTableHandler)
+ [HBASE-5437] - HRegionThriftServer does not start because of a bug in HbaseHandlerMetricsProxy
+ [HBASE-5466] - Opening a table also opens the metatable and never closes it.
+ [HBASE-5470] - Make DataBlockEncodingTool work correctly with no native compression codecs loaded
+ [HBASE-5473] - Metrics does not push pread time
+ [HBASE-5477] - Cannot build RPM for hbase-0.92.0
+ [HBASE-5480] - Fixups to MultithreadedTableMapper for Hadoop 0.23.2+
+ [HBASE-5481] - Uncaught UnknownHostException prevents HBase from starting
+ [HBASE-5484] - Spelling mistake in error message in HMasterCommandLine
+ [HBASE-5485] - LogCleaner refers to non-existant SnapshotLogCleaner
+ [HBASE-5488] - OfflineMetaRepair doesn't support hadoop 0.20's fs.default.name property
+ [HBASE-5499] - dev-support/test-patch.sh does not have execute perms
+ [HBASE-5502] - region_mover.rb fails to load regions back to original server for regions only containing empty tables.
+ [HBASE-5507] - ThriftServerRunner.HbaseHandler.getRegionInfo() and getTableRegions() do not use ByteBuffer correctly
+ [HBASE-5514] - Compile against hadoop 0.24-SNAPSHOT
+ [HBASE-5522] - hbase 0.92 test artifacts are missing from Maven central
+ [HBASE-5524] - Add a couple of more filters to our rat exclusion set
+ [HBASE-5529] - MR test failures becuase MALLOC_ARENA_MAX is not set
+ [HBASE-5531] - Maven hadoop profile (version 23) needs to be updated with latest 23 snapshot
+ [HBASE-5535] - Make the functions in task monitor synchronized
+ [HBASE-5537] - MXBean shouldn't have a dependence on InterfaceStability until 0.96
+ [HBASE-5545] - region can't be opened for a long time. Because the creating File failed.
+ [HBASE-5552] - Clean up our jmx view; its a bit of a mess
+ [HBASE-5562] - test-patch.sh reports a javadoc warning when there are no new javadoc warnings
+ [HBASE-5563] - HRegionInfo#compareTo should compare regionId as well
+ [HBASE-5567] - test-patch.sh has logic error in findbugs check
+ [HBASE-5568] - Multi concurrent flushcache() for one region could cause data loss
+ [HBASE-5569] - Do not collect deleted KVs when they are still in use by a scanner.
+ [HBASE-5574] - DEFAULT_MAX_FILE_SIZE defaults to a negative value
+ [HBASE-5579] - A Delete Version could mask other values
+ [HBASE-5581] - Creating a table with invalid syntax does not give an error message when it fails
+ [HBASE-5586] - [replication] NPE in ReplicationSource when creating a stream to an inexistent cluster
+ [HBASE-5596] - Few minor bugs from HBASE-5209
+ [HBASE-5597] - Findbugs check in test-patch.sh always fails
+ [HBASE-5603] - rolling-restart.sh script hangs when attempting to detect expiration of /hbase/master znode.
+ [HBASE-5606] - SplitLogManger async delete node hangs log splitting when ZK connection is lost
+ [HBASE-5611] - Replayed edits from regions that failed to open during recovery aren't removed from the global MemStore size
+ [HBASE-5613] - ThriftServer getTableRegions does not return serverName and port
+ [HBASE-5623] - Race condition when rolling the HLog and hlogFlush
+ [HBASE-5624] - Aborting regionserver when splitting region, may cause daughter region not assigned by ServerShutdownHandler.
+ [HBASE-5633] - NPE reading ZK config in HBase
+ [HBASE-5635] - If getTaskList() returns null, splitlogWorker would go down and it won't serve any requests
+ [HBASE-5636] - TestTableMapReduce doesn't work properly.
+ [HBASE-5639] - The logic used in waiting for region servers during startup is broken
+ [HBASE-5656] - LoadIncrementalHFiles createTable should detect and set compression algorithm
+ [HBASE-5663] - MultithreadedTableMapper doesn't work.
+ [HBASE-5665] - Repeated split causes HRegionServer failures and breaks table
+ [HBASE-5669] - AggregationClient fails validation for open stoprow scan
+ [HBASE-5680] - Improve compatibility warning about HBase with Hadoop 0.23.x
+ [HBASE-5689] - Skipping RecoveredEdits may cause data loss
+ [HBASE-5690] - compression does not work in Store.java of 0.94
+ [HBASE-5694] - getRowsWithColumnsTs() in Thrift service handles timestamps incorrectly
+ [HBASE-5701] - Put RegionServerDynamicStatistics under RegionServer in MBean hierarchy rather than have it as a peer.
+ [HBASE-5717] - Scanner metrics are only reported if you get to the end of a scanner
+ [HBASE-5720] - HFileDataBlockEncoderImpl uses wrong header size when reading HFiles with no checksums
+ [HBASE-5722] - NPE in ZKUtil#getChildDataAndWatchForNewChildren when ZK not available or NW down.
+ [HBASE-5724] - Row cache of KeyValue should be cleared in readFields().
+ [HBASE-5736] - ThriftServerRunner.HbaseHandler.mutateRow() does not use ByteBuffer correctly
+ [HBASE-5743] - Support GIT patches
+ [HBASE-5773] - HtablePool constructor not reading config files in certain cases
+ [HBASE-5780] - Fix race in HBase regionserver startup vs ZK SASL authentication
+ [HBASE-5781] - Zookeeper session got closed while trying to assign the region to RS using hbck -fix
+ [HBASE-5782] - Edits can be appended out of seqid order since HBASE-4487
+ [HBASE-5787] - Table owner can't disable/delete his/her own table
+ [HBASE-5795] - HServerLoad$RegionLoad breaks 0.92<->0.94 compatibility
+ [HBASE-5825] - TestHLog not running any tests; fix
+ [HBASE-5833] - 0.92 build has been failing pretty consistently on TestMasterFailover....
+ [HBASE-5848] - Create table with EMPTY_START_ROW passed as splitKey causes the HMaster to abort
+ [HBASE-5849] - On first cluster startup, RS aborts if root znode is not available
+ [HBASE-5850] - Refuse operations from Admin before master is initialized - fix for all branches.
+ [HBASE-5857] - RIT map in RS not getting cleared while region opening
+ [HBASE-5861] - Hadoop 23 compilation broken due to tests introduced in HBASE-5604
+ [HBASE-5864] - Error while reading from hfile in 0.94
+ [HBASE-5865] - test-util.sh broken with unittest updates
+ [HBASE-5866] - Canary in tool package but says its in tools.
+ [HBASE-5871] - Usability regression, we don't parse compression algos anymore
+ [HBASE-5873] - TimeOut Monitor thread should be started after atleast one region server registers.
+ [HBASE-5884] - MapReduce package info has broken link to bulk-loads
+ [HBASE-5885] - Invalid HFile block magic on Local file System
+ [HBASE-5893] - Allow spaces in coprocessor conf (aka trim() className)
+ [HBASE-5897] - prePut coprocessor hook causing substantial CPU usage
+ [HBASE-5908] - TestHLogSplit.testTralingGarbageCorruptionFileSkipErrorsPasses should not use append to corrupt the HLog
+ [HBASE-6265] - Calling getTimestamp() on a KV in cp.prePut() causes KV not to be flushed
+ [HBASE-6357] - Failed distributed log splitting stuck on master web UI
+
+Improvement
+
+ [HBASE-1744] - Thrift server to match the new java api.
+ [HBASE-2418] - add support for ZooKeeper authentication
+ [HBASE-3373] - Allow regions to be load-balanced by table
+ [HBASE-3433] - Remove the KV copy of every KV in Scan; introduced by HBASE-3232
+ [HBASE-3512] - Coprocessors: Shell support for listing currently loaded coprocessor set
+ [HBASE-3565] - Add metrics to keep track of slow HLog appends
+ [HBASE-3763] - Add Bloom Block Index Support
+ [HBASE-3850] - Log more details when a scanner lease expires
+ [HBASE-3924] - Improve Shell's CLI help
+ [HBASE-3949] - Add "Master" link to RegionServer pages
+ [HBASE-4058] - Extend TestHBaseFsck with a complete .META. recovery scenario
+ [HBASE-4062] - Multi-column scanner unit test
+ [HBASE-4070] - [Coprocessors] Improve region server metrics to report loaded coprocessors to master
+ [HBASE-4076] - hbase should pick up HADOOP_CONF_DIR on its classpath
+ [HBASE-4131] - Make the Replication Service pluggable via a standard interface definition
+ [HBASE-4132] - Extend the WALActionsListener API to accomodate log archival
+ [HBASE-4145] - Provide metrics for hbase client
+ [HBASE-4213] - Support for fault tolerant, instant schema updates with out master's intervention (i.e with out enable/disable and bulk assign/unassign) through ZK.
+ [HBASE-4218] - Data Block Encoding of KeyValues (aka delta encoding / prefix compression)
+ [HBASE-4365] - Add a decent heuristic for region size
+ [HBASE-4418] - Show all the hbase configuration in the web ui
+ [HBASE-4439] - Move ClientScanner out of HTable
+ [HBASE-4440] - add an option to presplit table to PerformanceEvaluation
+ [HBASE-4461] - Expose getRowOrBefore via Thrift
+ [HBASE-4463] - Run more aggressive compactions during off peak hours
+ [HBASE-4465] - Lazy-seek optimization for StoreFile scanners
+ [HBASE-4469] - Avoid top row seek by looking up ROWCOL bloomfilter
+ [HBASE-4480] - Testing script to simplify local testing
+ [HBASE-4487] - The increment operation can release the rowlock before sync-ing the Hlog
+ [HBASE-4489] - Better key splitting in RegionSplitter
+ [HBASE-4519] - 25s sleep when expiring sessions in tests
+ [HBASE-4522] - Make hbase-site-custom.xml override the hbase-site.xml
+ [HBASE-4528] - The put operation can release the rowlock before sync-ing the Hlog
+ [HBASE-4532] - Avoid top row seek by dedicated bloom filter for delete family bloom filter
+ [HBASE-4542] - add filter info to slow query logging
+ [HBASE-4554] - Allow set/unset coprocessor table attributes from shell.
+ [HBASE-4568] - Make zk dump jsp response more quickly
+ [HBASE-4585] - Avoid next operations (and instead reseek) when current kv is deleted
+ [HBASE-4591] - TTL for old HLogs should be calculated from last modification time.
+ [HBASE-4612] - Allow ColumnPrefixFilter to support multiple prefixes
+ [HBASE-4627] - Ability to specify a custom start/end to RegionSplitter
+ [HBASE-4628] - Enhance Table Create Presplit Functionality within the HBase Shell
+ [HBASE-4640] - Catch ClosedChannelException and document it
+ [HBASE-4657] - Improve the efficiency of our MR jobs with a few configurations
+ [HBASE-4669] - Add an option of using round-robin assignment for enabling table
+ [HBASE-4696] - HRegionThriftServer' might have to indefinitely do redirtects
+ [HBASE-4704] - A JRuby script for identifying active master
+ [HBASE-4737] - Categorize the tests into small/medium/large; allow small tests to be run in parallel within a single JVM
+ [HBASE-4746] - Use a random ZK client port in unit tests so we can run them in parallel
+ [HBASE-4752] - Don't create an unnecessary LinkedList when evicting from the BlockCache
+ [HBASE-4760] - Add Developer Debug Options to HBase Config
+ [HBASE-4761] - Add Developer Debug Options to HBase Config
+ [HBASE-4764] - naming errors for TestHLogUtils and SoftValueSortedMapTest
+ [HBASE-4779] - TestHTablePool, TestScanWithBloomError, TestRegionSplitCalculator are not tagged and TestPoolMap should not use TestSuite
+ [HBASE-4780] - Lower mini cluster shutdown time in HRegionServer#waitOnAllRegionsToClose and ServerManager#letRegionServersShutdown
+ [HBASE-4781] - Pom update to use the new versions of surefire & junit
+ [HBASE-4783] - Improve RowCounter to count rows in a specific key range.
+ [HBASE-4787] - Make corePool as a configurable parameter in HTable
+ [HBASE-4798] - Sleeps and synchronisation improvements for tests
+ [HBASE-4809] - Per-CF set RPC metrics
+ [HBASE-4820] - Distributed log splitting coding enhancement to make it easier to understand, no semantics change
+ [HBASE-4847] - Activate single jvm for small tests on jenkins
+ [HBASE-4863] - Make Thrift server thread pool bounded and add a command-line UI test
+ [HBASE-4884] - Allow environment overrides for various HBase processes
+ [HBASE-4933] - Ability to calculate the blockcache hit ratio for the last few minutes
+ [HBASE-4938] - Create a HRegion.getScanner public method that allows reading from a specified readPoint
+ [HBASE-4940] - hadoop-metrics.properties can include configuration of the "rest" context for ganglia
+ [HBASE-4957] - Clean up some log messages, code in RecoverableZooKeeper
+ [HBASE-4964] - Add builddate, make less sections in toc, and add header and footer customizations
+ [HBASE-4965] - Monitor the open file descriptors and the threads counters during the unit tests
+ [HBASE-4970] - Add a parameter so that keepAliveTime of Htable thread pool can be changed
+ [HBASE-4971] - Useless sleeps in TestTimestampsFilter and TestMultipleTimestamps
+ [HBASE-4973] - On failure, HBaseAdmin sleeps one time too many
+ [HBASE-4989] - Metrics to measure sequential reads and random reads separately
+ [HBASE-4995] - Increase zk maxClientCnxns to give us some head room
+ [HBASE-5014] - PutSortReducer should adhere to memory limits
+ [HBASE-5017] - Bump the default hfile.block.cache.size because of HFileV2
+ [HBASE-5021] - Enforce upper bound on timestamp
+ [HBASE-5033] - Opening/Closing store in parallel to reduce region open/close time
+ [HBASE-5064] - utilize surefire tests parallelization
+ [HBASE-5072] - Support Max Value for Per-Store Metrics
+ [HBASE-5074] - support checksums in HBase block cache
+ [HBASE-5134] - Remove getRegionServerWithoutRetries and getRegionServerWithRetries from HConnection Interface
+ [HBASE-5166] - MultiThreaded Table Mapper analogous to MultiThreaded Mapper in hadoop
+ [HBASE-5167] - We shouldn't be injecting 'Killing [daemon]' into logs, when we aren't doing that.
+ [HBASE-5186] - Add metrics to ThriftServer
+ [HBASE-5189] - Add metrics to keep track of region-splits in RS
+ [HBASE-5190] - Limit the IPC queue size based on calls' payload size
+ [HBASE-5193] - Use TBoundedThreadPoolServer in HRegionThriftServer
+ [HBASE-5197] - [replication] Handle socket timeouts in ReplicationSource to prevent DDOS
+ [HBASE-5199] - Delete out of TTL store files before compaction selection
+ [HBASE-5201] - Utilize TThreadedSelectorServer and remove redundant code in ThriftServer and HRegionThriftServer
+ [HBASE-5209] - HConnection/HMasterInterface should allow for way to get hostname of currently active master in multi-master HBase setup
+ [HBASE-5246] - Regenerate code with thrift 0.8.0
+ [HBASE-5255] - Use singletons for OperationStatus to save memory
+ [HBASE-5259] - Normalize the RegionLocation in TableInputFormat by the reverse DNS lookup.
+ [HBASE-5297] - Update metrics numOpenConnections and callQueueLen directly in HBaseServer
+ [HBASE-5298] - Add thrift metrics to thrift2
+ [HBASE-5304] - Pluggable split key policy
+ [HBASE-5310] - HConnectionManager server cache key enhancement
+ [HBASE-5325] - Expose basic information about the master-status through jmx beans
+ [HBASE-5332] - Deterministic Compaction Jitter
+ [HBASE-5358] - HBaseObjectWritable should be able to serialize/deserialize generic arrays
+ [HBASE-5363] - Automatically run rat check on mvn release builds
+ [HBASE-5388] - Tune HConnectionManager#getCachedLocation method
+ [HBASE-5393] - Consider splitting after flushing
+ [HBASE-5394] - Add ability to include Protobufs in HbaseObjectWritable
+ [HBASE-5395] - CopyTable needs to use GenericOptionsParser
+ [HBASE-5411] - Add more metrics for ThriftMetrics
+ [HBASE-5421] - use hadoop-client/hadoop-minicluster artifacts for Hadoop 0.23 build
+ [HBASE-5428] - Allow for custom filters to be registered within the Thrift interface
+ [HBASE-5433] - [REST] Add metrics to keep track of success/failure count
+ [HBASE-5434] - [REST] Include more metrics in cluster status request
+ [HBASE-5436] - Right-size the map when reading attributes.
+ [HBASE-5439] - Fix some performance findbugs issues
+ [HBASE-5440] - Allow Import to optionally use HFileOutputFormat
+ [HBASE-5442] - Use builder pattern in StoreFile and HFile
+ [HBASE-5454] - Refuse operations from Admin before master is initialized
+ [HBASE-5464] - Log warning message when thrift calls throw exceptions
+ [HBASE-5483] - Allow configurable host to bind to for starting REST server from commandline
+ [HBASE-5489] - Add HTable accessor to get regions for a key range
+ [HBASE-5508] - Add an option to allow test output to show on the terminal
+ [HBASE-5520] - Support reseek() at RegionScanner
+ [HBASE-5533] - Add more metrics to HBase
+ [HBASE-5551] - Some functions should not be used by customer code and must be deprecated in 0.94
+ [HBASE-5560] - Avoid RegionServer GC caused by timed-out calls
+ [HBASE-5588] - Deprecate/remove AssignmentManager#clearRegionFromTransition
+ [HBASE-5589] - Add of the offline call to the Master Interface
+ [HBASE-5592] - Make it easier to get a table from shell
+ [HBASE-5618] - SplitLogManager - prevent unnecessary attempts to resubmits
+ [HBASE-5670] - Have Mutation implement the Row interface.
+ [HBASE-5671] - hbase.metrics.showTableName should be true by default
+ [HBASE-5682] - Allow HConnectionImplementation to recover from ZK connection loss (for 0.94 only)
+ [HBASE-5706] - "Dropping fs latency stats since buffer is full" spam
+ [HBASE-5712] - Parallelize load of .regioninfo files in diagnostic/repair portion of hbck.
+ [HBASE-5734] - Change hbck sideline root
+ [HBASE-5735] - Clearer warning message when connecting a non-secure HBase client to a secure HBase server
+ [HBASE-5737] - Minor Improvements related to balancer.
+ [HBASE-5748] - Enable lib directory in jar file for coprocessor
+ [HBASE-5770] - Add a clock skew warning threshold
+ [HBASE-5775] - ZKUtil doesn't handle deleteRecurisively cleanly
+ [HBASE-5823] - Hbck should be able to print help
+ [HBASE-5862] - After Region Close remove the Operation Metrics.
+ [HBASE-5863] - Improve the graceful_stop.sh CLI help (especially about reloads)
+ [HBASE-6173] - hbck check specified tables only
+ [HBASE-5360] - [uberhbck] Add options for how to handle offline split parents.
+
+New Feature
+
+ [HBASE-2947] - MultiIncrement/MultiAppend (MultiGet functionality for increments and appends)
+ [HBASE-3134] - [replication] Add the ability to enable/disable streams
+ [HBASE-3584] - Allow atomic put/delete in one call
+ [HBASE-3856] - Build a tree structure data block index inside of the HFile
+ [HBASE-4102] - atomicAppend: A put that appends to the latest version of a cell; i.e. reads current value then adds the bytes offered by the client to the tail and writes out a new entry
+ [HBASE-4219] - Add Per-Column Family Metrics
+ [HBASE-4393] - Implement a canary monitoring program
+ [HBASE-4460] - Support running an embedded ThriftServer within a RegionServer
+ [HBASE-4536] - Allow CF to retain deleted rows
+ [HBASE-4608] - HLog Compression
+ [HBASE-4629] - enable automated patch testing for hbase
+ [HBASE-4683] - Always cache index and bloom blocks
+ [HBASE-4698] - Let the HFile Pretty Printer print all the key values for a specific row.
+ [HBASE-4768] - Per-(table, columnFamily) metrics with configurable table name inclusion
+ [HBASE-5128] - [uber hbck] Online automated repair of table integrity and region consistency problems
+ [HBASE-5177] - HTable needs a non cached version of getRegionLocation
+ [HBASE-5229] - Provide basic building blocks for "multi-row" local transactions.
+ [HBASE-5526] - Configurable file and directory based umask
+ [HBASE-5599] - [hbck] handle NO_VERSION_FILE and SHOULD_NOT_BE_DEPLOYED inconsistencies
+ [HBASE-5604] - M/R tool to replay WAL files
+ [HBASE-5719] - Enhance hbck to sideline overlapped mega regions
+
+Task
+
+ [HBASE-4256] - Intra-row scanning (part deux)
+ [HBASE-4429] - Provide synchronous balanceSwitch()
+ [HBASE-4611] - Add support for Phabricator/Differential as an alternative code review tool
+ [HBASE-4712] - Document rules for writing tests
+ [HBASE-4751] - Make TestAdmin#testEnableTableRoundRobinAssignment friendly to concurrent tests
+ [HBASE-4968] - Add to troubleshooting workaround for direct buffer oome's.
+ [HBASE-5011] - Move test-util.sh from src/test/bin to dev-tools
+ [HBASE-5084] - Allow different HTable instances to share one ExecutorService
+ [HBASE-5111] - Upgrade zookeeper to 3.4.2 release
+ [HBASE-5173] - Commit hbase-4480 findHangingTest.sh script under dev-support
+ [HBASE-5256] - Use WritableUtils.readVInt() in RegionLoad.readFields()
+ [HBASE-5264] - Add 0.92.0 upgrade guide
+ [HBASE-5294] - Make sure javadoc is included in tarball bundle when we release
+ [HBASE-5400] - Some tests does not have annotations for (Small|Medium|Large)Tests
+ [HBASE-5427] - Upgrade our zk to 3.4.3
+ [HBASE-5511] - More doc on maven release process
+ [HBASE-5715] - Revert 'Instant schema alter' for now, HBASE-4213
+ [HBASE-5721] - Update bundled hadoop to be 1.0.2 (it was just released)
+ [HBASE-5758] - Forward port "HBASE-4109 Hostname returned via reverse dns lookup contains trailing period if configured interface is not 'default'"
+ [HBASE-5836] - Backport per region metrics from HBASE-3614 to 0.94.1
+
+Test
+
+ [HBASE-4516] - HFile-level load tester with compaction and random-read workloads
+ [HBASE-4534] - A new unit test for lazy seek and StoreScanner in general
+ [HBASE-4545] - TestHLog doesn't clean up after itself
+ [HBASE-4772] - Utility to Create StoreFiles
+ [HBASE-4808] - Test to Ensure Expired Deletes Don't Override Puts
+ [HBASE-4864] - TestMasterObserver#testRegionTransitionOperations occasionally fails
+ [HBASE-4868] - TestOfflineMetaRebuildBase#testMetaRebuild occasionally fails
+ [HBASE-5150] - Failure in a thread may not fail a test, clean up log splitting test
+ [HBASE-5223] - TestMetaReaderEditor is missing call to CatalogTracker.stop()
+ [HBASE-5455] - Add test to avoid unintentional reordering of items in HbaseObjectWritable
+ [HBASE-5792] - HLog Performance Evaluation Tool
+
Release 0.92.1 - Unreleased
BUG FIXES
diff --git a/LICENSE.txt b/LICENSE.txt
index d64569567334..b94f0c03adf9 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -200,3 +200,72 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
+
+----
+This project incorporates portions of the 'Protocol Buffers' project avaialble
+under a '3-clause BSD' license.
+
+ Copyright 2008, Google Inc.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Google Inc. nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ Code generated by the Protocol Buffer compiler is owned by the owner
+ of the input file used when generating it. This code is not
+ standalone and requires a support library to be linked with it. This
+ support library is itself covered by the above license.
+
+--
+
+This project incorporates part of the 'FreeBSD Documentation Project'
+available under a BSD-style license.
+
+ * Copyright (c) 2001, 2003, 2010 The FreeBSD Documentation Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: doc/share/misc/docbook.css,v 1.15 2010/03/20 04:15:01 hrs Exp $
diff --git a/NOTICE.txt b/NOTICE.txt
index 3ae710800cd6..dcaa169338be 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -1,16 +1,12 @@
-This product includes software developed by The Apache Software
-Foundation (http://www.apache.org/).
+Apache HBase
+Copyright 2007-2015 The Apache Software Foundation
-In addition, this product includes software developed by:
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
-JUnit (http://www.junit.org/) included under the Common Public License v1.0. See
-the full text here: http://junit.sourceforge.net/cpl-v10.html
+--
+This product incorporates portions of the 'Hadoop' project
-JRuby (http://jruby.org) is tri-licensed. We include it under terms of the
-Common Public License v1.0.
+Copyright 2007-2009 The Apache Software Foundation
-JRuby itself includes libraries variously licensed. See its COPYING document
-for details: https://github.com/jruby/jruby/blob/master/COPYING
-
-The JRuby community went out of their way to make JRuby compatible with Apache
-projects: See https://issues.apache.org/jira/browse/HBASE-3374)
+Licensed under the Apache License v2.0
diff --git a/bin/copy_table.rb b/bin/copy_table.rb
deleted file mode 100644
index ad6e70321b82..000000000000
--- a/bin/copy_table.rb
+++ /dev/null
@@ -1,166 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Script that copies table in hbase. As written, will not work for rare
-# case where there is more than one region in .META. table. Does the
-# update of the hbase .META. and copies the directories in filesystem.
-# HBase MUST be shutdown when you run this script.
-#
-# To see usage for this script, run:
-#
-# ${HBASE_HOME}/bin/hbase org.jruby.Main rename_table.rb
-#
-include Java
-import org.apache.hadoop.hbase.util.MetaUtils
-import org.apache.hadoop.hbase.util.FSUtils
-import org.apache.hadoop.hbase.util.Bytes
-import org.apache.hadoop.hbase.util.Writables
-import org.apache.hadoop.hbase.HConstants
-import org.apache.hadoop.hbase.HBaseConfiguration
-import org.apache.hadoop.hbase.HStoreKey
-import org.apache.hadoop.hbase.HRegionInfo
-import org.apache.hadoop.hbase.HTableDescriptor
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable
-import org.apache.hadoop.hbase.regionserver.HLogEdit
-import org.apache.hadoop.hbase.regionserver.HRegion
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.fs.FileSystem
-import org.apache.hadoop.fs.FileUtil
-import org.apache.commons.logging.Log
-import org.apache.commons.logging.LogFactory
-import java.util.TreeMap
-
-# Name of this script
-NAME = "copy_table"
-
-# Print usage for this script
-def usage
- puts 'Usage: %s.rb ' % NAME
- exit!
-end
-
-# Passed 'dir' exists and is a directory else exception
-def isDirExists(fs, dir)
- raise IOError.new("Does not exit: " + dir.toString()) unless fs.exists(dir)
- raise IOError.new("Not a directory: " + dir.toString()) unless fs.isDirectory(dir)
-end
-
-# Returns true if the region belongs to passed table
-def isTableRegion(tableName, hri)
- return Bytes.equals(hri.getTableDesc().getName(), tableName)
-end
-
-# Create new HRI based off passed 'oldHRI'
-def createHRI(tableName, oldHRI)
- htd = oldHRI.getTableDesc()
- newHtd = HTableDescriptor.new(tableName)
- for family in htd.getFamilies()
- newHtd.addFamily(family)
- end
- return HRegionInfo.new(newHtd, oldHRI.getStartKey(), oldHRI.getEndKey(),
- oldHRI.isSplit())
-end
-
-# Check arguments
-if ARGV.size != 2
- usage
-end
-
-# Check good table names were passed.
-oldTableName = HTableDescriptor.isLegalTableName(ARGV[0].to_java_bytes)
-newTableName = HTableDescriptor.isLegalTableName(ARGV[1].to_java_bytes)
-
-# Get configuration to use.
-c = HBaseConfiguration.new()
-
-# Set hadoop filesystem configuration using the hbase.rootdir.
-# Otherwise, we'll always use localhost though the hbase.rootdir
-# might be pointing at hdfs location.
-c.set("fs.default.name", c.get(HConstants::HBASE_DIR))
-fs = FileSystem.get(c)
-
-# If new table directory does not exit, create it. Keep going if already
-# exists because maybe we are rerunning script because it failed first
-# time.
-rootdir = FSUtils.getRootDir(c)
-oldTableDir = Path.new(rootdir, Path.new(Bytes.toString(oldTableName)))
-isDirExists(fs, oldTableDir)
-newTableDir = Path.new(rootdir, Bytes.toString(newTableName))
-if !fs.exists(newTableDir)
- fs.mkdirs(newTableDir)
-end
-
-# Get a logger and a metautils instance.
-LOG = LogFactory.getLog(NAME)
-utils = MetaUtils.new(c)
-
-# Start. Get all meta rows.
-begin
- # Get list of all .META. regions that contain old table name
- metas = utils.getMETARows(oldTableName)
- index = 0
- for meta in metas
- # For each row we find, move its region from old to new table.
- # Need to update the encoded name in the hri as we move.
- # After move, delete old entry and create a new.
- LOG.info("Scanning " + meta.getRegionNameAsString())
- metaRegion = utils.getMetaRegion(meta)
- scanner = metaRegion.getScanner(HConstants::COL_REGIONINFO_ARRAY, oldTableName,
- HConstants::LATEST_TIMESTAMP, nil)
- begin
- key = HStoreKey.new()
- value = TreeMap.new(Bytes.BYTES_COMPARATOR)
- while scanner.next(key, value)
- index = index + 1
- keyStr = key.toString()
- oldHRI = Writables.getHRegionInfo(value.get(HConstants::COL_REGIONINFO))
- if !oldHRI
- raise IOError.new(index.to_s + " HRegionInfo is null for " + keyStr)
- end
- unless isTableRegion(oldTableName, oldHRI)
- # If here, we passed out the table. Break.
- break
- end
- oldRDir = Path.new(oldTableDir, Path.new(oldHRI.getEncodedName().to_s))
- if !fs.exists(oldRDir)
- LOG.warn(oldRDir.toString() + " does not exist -- region " +
- oldHRI.getRegionNameAsString())
- else
- # Now make a new HRegionInfo to add to .META. for the new region.
- newHRI = createHRI(newTableName, oldHRI)
- newRDir = Path.new(newTableDir, Path.new(newHRI.getEncodedName().to_s))
- # Move the region in filesystem
- LOG.info("Copying " + oldRDir.toString() + " as " + newRDir.toString())
- FileUtil.copy(fs, oldRDir, fs, newRDir, false, true, c)
- # Create 'new' region
- newR = HRegion.new(rootdir, utils.getLog(), fs, c, newHRI, nil)
- # Add new row. NOTE: Presumption is that only one .META. region. If not,
- # need to do the work to figure proper region to add this new region to.
- LOG.info("Adding to meta: " + newR.toString())
- HRegion.addRegionToMETA(metaRegion, newR)
- LOG.info("Done copying: " + Bytes.toString(key.getRow()))
- end
- # Need to clear value else we keep appending values.
- value.clear()
- end
- ensure
- scanner.close()
- end
- end
-ensure
- utils.shutdown()
-end
diff --git a/bin/get-active-master.rb b/bin/get-active-master.rb
index 8887a4574c14..6855cfcd7c40 100644
--- a/bin/get-active-master.rb
+++ b/bin/get-active-master.rb
@@ -1,6 +1,4 @@
#!/usr/bin/env hbase-jruby
-# Copyright 2011 The Apache Software Foundation
-#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with this
# work for additional information regarding copyright ownership. The ASF
diff --git a/bin/graceful_stop.sh b/bin/graceful_stop.sh
old mode 100644
new mode 100755
index cf7bee86ff1d..80461a593851
--- a/bin/graceful_stop.sh
+++ b/bin/graceful_stop.sh
@@ -1,8 +1,6 @@
#!/usr/bin/env bash
#
#/**
-# * Copyright 2011 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
@@ -19,16 +17,16 @@
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
-
+
# Move regions off a server then stop it. Optionally restart and reload.
# Turn off the balancer before running this script.
function usage {
- echo "Usage: graceful_stop.sh [--config ] [--restart] [--reload] [--thrift] [--rest] "
+ echo "Usage: graceful_stop.sh [--config ] [--restart [--reload]] [--thrift] [--rest] "
echo " thrift If we should stop/start thrift before/after the hbase stop/start"
echo " rest If we should stop/start rest before/after the hbase stop/start"
echo " restart If we should restart after graceful stop"
- echo " reload Move offloaded regions back on to the stopped server"
- echo " debug Move offloaded regions back on to the stopped server"
+ echo " reload Move offloaded regions back on to the restarted server"
+ echo " debug Print helpful debug information"
echo " hostname Hostname of server we are to stop"
exit 1
}
@@ -69,8 +67,9 @@ fi
hostname=$1
filename="/tmp/$hostname"
# Run the region mover script.
-echo "Disabling balancer!"
-echo 'balance_switch false' | "$bin"/hbase --config ${HBASE_CONF_DIR} shell
+echo "Disabling balancer! (if required)"
+HBASE_BALANCER_STATE=`echo 'balance_switch false' | "$bin"/hbase --config ${HBASE_CONF_DIR} shell | tail -3 | head -1`
+echo "Previous balancer state was $HBASE_BALANCER_STATE"
echo "Unloading $hostname region(s)"
HBASE_NOEXEC=true "$bin"/hbase --config ${HBASE_CONF_DIR} org.jruby.Main "$bin"/region_mover.rb --file=$filename $debug unload $hostname
echo "Unloaded $hostname region(s)"
@@ -100,5 +99,10 @@ if [ "$restart" != "" ]; then
fi
fi
+if [ $HBASE_BALANCER_STATE != "false" ]; then
+ echo "Restoring balancer state to" $HBASE_BALANCER_STATE
+ echo "balance_switch $HBASE_BALANCER_STATE" | "$bin"/hbase --config ${HBASE_CONF_DIR} shell &> /dev/null
+fi
+
# Cleanup tmp files.
trap "rm -f "/tmp/$(basename $0).*.tmp" &> /dev/null" EXIT
diff --git a/bin/hbase b/bin/hbase
index c5692ffb746b..158c7c8fd475 100755
--- a/bin/hbase
+++ b/bin/hbase
@@ -1,8 +1,6 @@
#! /usr/bin/env bash
#
#/**
-# * Copyright 2007 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
@@ -31,11 +29,14 @@
#
# HBASE_CLASSPATH Extra Java CLASSPATH entries.
#
+# HBASE_CLASSPATH_PREFIX Extra Java CLASSPATH entries that should be
+# prefixed to the system classpath.
+#
# HBASE_HEAPSIZE The maximum amount of heap to use, in MB.
# Default is 1000.
#
# HBASE_LIBRARY_PATH HBase additions to JAVA_LIBRARY_PATH for adding
-# native libaries.
+# native libraries.
#
# HBASE_OPTS Extra Java runtime options.
#
@@ -91,6 +92,7 @@ if [ $# = 0 ]; then
echo ""
echo "PACKAGE MANAGEMENT"
echo " classpath dump hbase CLASSPATH"
+ echo " mapredcp dump CLASSPATH entries required by mapreduce"
echo " version print the version"
echo ""
echo " or"
@@ -186,11 +188,6 @@ for f in $HBASE_HOME/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
-# Add user-specified CLASSPATH last
-if [ "$HBASE_CLASSPATH" != "" ]; then
- CLASSPATH=${CLASSPATH}:${HBASE_CLASSPATH}
-fi
-
# default log directory & file
if [ "$HBASE_LOG_DIR" = "" ]; then
HBASE_LOG_DIR="$HBASE_HOME/logs"
@@ -199,13 +196,6 @@ if [ "$HBASE_LOGFILE" = "" ]; then
HBASE_LOGFILE='hbase.log'
fi
-# cygwin path translation
-if $cygwin; then
- CLASSPATH=`cygpath -p -w "$CLASSPATH"`
- HBASE_HOME=`cygpath -d "$HBASE_HOME"`
- HBASE_LOG_DIR=`cygpath -d "$HBASE_LOG_DIR"`
-fi
-
function append_path() {
if [ -z "$1" ]; then
echo $2
@@ -217,31 +207,31 @@ function append_path() {
JAVA_PLATFORM=""
#If avail, add Hadoop to the CLASSPATH and to the JAVA_LIBRARY_PATH
-if [ ! -z $HADOOP_HOME ]; then
- HADOOPCPPATH=""
- if [ -z $HADOOP_CONF_DIR ]; then
- HADOOPCPPATH=$(append_path "${HADOOPCPPATH}" "${HADOOP_HOME}/conf")
- else
- HADOOPCPPATH=$(append_path "${HADOOPCPPATH}" "${HADOOP_CONF_DIR}")
- fi
- if [ "`echo ${HADOOP_HOME}/hadoop-core*.jar`" != "${HADOOP_HOME}/hadoop-core*.jar" ] ; then
- HADOOPCPPATH=$(append_path "${HADOOPCPPATH}" `ls ${HADOOP_HOME}/hadoop-core*.jar | head -1`)
- else
- HADOOPCPPATH=$(append_path "${HADOOPCPPATH}" `ls ${HADOOP_HOME}/hadoop-common*.jar | head -1`)
- HADOOPCPPATH=$(append_path "${HADOOPCPPATH}" `ls ${HADOOP_HOME}/hadoop-hdfs*.jar | head -1`)
- HADOOPCPPATH=$(append_path "${HADOOPCPPATH}" `ls ${HADOOP_HOME}/hadoop-mapred*.jar | head -1`)
- fi
- for i in "${HADOOP_HOME}/lib/"*.jar; do
- HADOOPCPPATH="${HADOOPCPPATH}:$i"
- done
- CLASSPATH=$(append_path "${CLASSPATH}" "${HADOOPCPPATH}")
-
- if [ -d "${HADOOP_HOME}/lib/native" ]; then
- JAVA_PLATFORM=`CLASSPATH=${HADOOPCPPATH} ${JAVA} org.apache.hadoop.util.PlatformName | sed -e "s/ /_/g"`
- if [ -d "${HADOOP_HOME}/lib/native/${JAVA_PLATFORM}" ]; then
- JAVA_LIBRARY_PATH=$(append_path "${JAVA_LIBRARY_PATH}" "${HADOOP_HOME}/lib/native/${JAVA_PLATFORM}")
- fi
+HADOOP_IN_PATH=$(PATH="${HADOOP_HOME:-${HADOOP_PREFIX}}/bin:$PATH" which hadoop 2>/dev/null)
+if [ -f ${HADOOP_IN_PATH} ]; then
+ HADOOP_JAVA_LIBRARY_PATH=$(HADOOP_CLASSPATH="$CLASSPATH" ${HADOOP_IN_PATH} \
+ org.apache.hadoop.hbase.util.GetJavaProperty java.library.path 2>/dev/null)
+ if [ -n "$HADOOP_JAVA_LIBRARY_PATH" ]; then
+ JAVA_LIBRARY_PATH=$(append_path "${JAVA_LIBRARY_PATH}" "$HADOOP_JAVA_LIBRARY_PATH")
fi
+ CLASSPATH=$(append_path "${CLASSPATH}" `${HADOOP_IN_PATH} classpath 2>/dev/null`)
+fi
+
+# Add user-specified CLASSPATH last
+if [ "$HBASE_CLASSPATH" != "" ]; then
+ CLASSPATH=${CLASSPATH}:${HBASE_CLASSPATH}
+fi
+
+# Add user-specified CLASSPATH prefix first
+if [ "$HBASE_CLASSPATH_PREFIX" != "" ]; then
+ CLASSPATH=${HBASE_CLASSPATH_PREFIX}:${CLASSPATH}
+fi
+
+# cygwin path translation
+if $cygwin; then
+ CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+ HBASE_HOME=`cygpath -d "$HBASE_HOME"`
+ HBASE_LOG_DIR=`cygpath -d "$HBASE_LOG_DIR"`
fi
if [ -d "${HBASE_HOME}/build/native" -o -d "${HBASE_HOME}/lib/native" ]; then
@@ -265,6 +255,21 @@ fi
# restore ordinary behaviour
unset IFS
+#Set the right GC options based on the what we are running
+declare -a server_cmds=("master" "regionserver" "thrift" "thrift2" "rest" "avro" "zookeeper")
+for cmd in ${server_cmds[@]}; do
+ if [[ $cmd == $COMMAND ]]; then
+ server=true
+ break
+ fi
+done
+
+if [[ $server ]]; then
+ HBASE_OPTS="$HBASE_OPTS $SERVER_GC_OPTS"
+else
+ HBASE_OPTS="$HBASE_OPTS $CLIENT_GC_OPTS"
+fi
+
# figure out which class to run
if [ "$COMMAND" = "shell" ] ; then
# eg export JRUBY_HOME=/usr/local/share/jruby
@@ -272,6 +277,7 @@ if [ "$COMMAND" = "shell" ] ; then
CLASSPATH="$JRUBY_HOME/lib/jruby.jar:$CLASSPATH"
HBASE_OPTS="$HBASE_OPTS -Djruby.home=$JRUBY_HOME -Djruby.lib=$JRUBY_HOME/lib"
fi
+ HBASE_OPTS="$HBASE_OPTS $HBASE_SHELL_OPTS"
CLASS="org.jruby.Main -X+O ${JRUBY_OPTS} ${HBASE_HOME}/bin/hirb.rb"
elif [ "$COMMAND" = "hbck" ] ; then
CLASS='org.apache.hadoop.hbase.util.HBaseFsck'
@@ -319,7 +325,8 @@ elif [ "$COMMAND" = "zookeeper" ] ; then
if [ "$1" != "stop" ] ; then
HBASE_OPTS="$HBASE_OPTS $HBASE_ZOOKEEPER_OPTS"
fi
-
+elif [ "$COMMAND" = "mapredcp" ] ; then
+ CLASS='org.apache.hadoop.hbase.util.MapreduceDependencyClasspathTool'
elif [ "$COMMAND" = "classpath" ] ; then
echo $CLASSPATH
exit 0
@@ -340,6 +347,14 @@ HBASE_OPTS="$HBASE_OPTS -Dhbase.id.str=$HBASE_IDENT_STRING"
HBASE_OPTS="$HBASE_OPTS -Dhbase.root.logger=${HBASE_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
HBASE_OPTS="$HBASE_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
+ export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$JAVA_LIBRARY_PATH"
+fi
+
+# Enable security logging on the master and regionserver only
+if [ "$COMMAND" = "master" ] || [ "$COMMAND" = "regionserver" ]; then
+ HBASE_OPTS="$HBASE_OPTS -Dhbase.security.logger=${HBASE_SECURITY_LOGGER:-INFO,DRFAS}"
+else
+ HBASE_OPTS="$HBASE_OPTS -Dhbase.security.logger=${HBASE_SECURITY_LOGGER:-INFO,NullAppender}"
fi
# Exec unless HBASE_NOEXEC is set.
diff --git a/bin/hbase-config.sh b/bin/hbase-config.sh
index 63b4f058eb68..0137db9219dd 100644
--- a/bin/hbase-config.sh
+++ b/bin/hbase-config.sh
@@ -1,7 +1,5 @@
#
#/**
-# * Copyright 2007 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
@@ -77,10 +75,16 @@ HBASE_REGIONSERVERS="${HBASE_REGIONSERVERS:-$HBASE_CONF_DIR/regionservers}"
HBASE_BACKUP_MASTERS="${HBASE_BACKUP_MASTERS:-$HBASE_CONF_DIR/backup-masters}"
# Source the hbase-env.sh. Will have JAVA_HOME defined.
-if [ -f "${HBASE_CONF_DIR}/hbase-env.sh" ]; then
+# HBASE-7817 - Source the hbase-env.sh only if it has not already been done. HBASE_ENV_INIT keeps track of it.
+if [ -z "$HBASE_ENV_INIT" ] && [ -f "${HBASE_CONF_DIR}/hbase-env.sh" ]; then
. "${HBASE_CONF_DIR}/hbase-env.sh"
+ export HBASE_ENV_INIT="true"
fi
+# Newer versions of glibc use an arena memory allocator that causes virtual
+# memory usage to explode. Tune the variable down to prevent vmem explosion.
+export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
+
if [ -z "$JAVA_HOME" ]; then
for candidate in \
/usr/lib/jvm/java-6-sun \
diff --git a/bin/hbase-daemon.sh b/bin/hbase-daemon.sh
index ffae30a49dae..201548a286eb 100755
--- a/bin/hbase-daemon.sh
+++ b/bin/hbase-daemon.sh
@@ -1,8 +1,6 @@
#!/usr/bin/env bash
#
#/**
-# * Copyright 2007 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
@@ -116,14 +114,26 @@ fi
JAVA=$JAVA_HOME/bin/java
export HBASE_LOG_PREFIX=hbase-$HBASE_IDENT_STRING-$command-$HOSTNAME
export HBASE_LOGFILE=$HBASE_LOG_PREFIX.log
-export HBASE_ROOT_LOGGER="INFO,DRFA"
-logout=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.out
+
+if [ -z "${HBASE_ROOT_LOGGER}" ]; then
+export HBASE_ROOT_LOGGER=${HBASE_ROOT_LOGGER:-"INFO,DRFA"}
+fi
+
+if [ -z "${HBASE_SECURITY_LOGGER}" ]; then
+export HBASE_SECURITY_LOGGER=${HBASE_SECURITY_LOGGER:-"INFO,DRFAS"}
+fi
+
+logout=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.out
+
loggc=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.gc
loglog="${HBASE_LOG_DIR}/${HBASE_LOGFILE}"
pid=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
-if [ "$HBASE_USE_GC_LOGFILE" = "true" ]; then
- export HBASE_GC_OPTS=" -Xloggc:${loggc}"
+if [ -n "$SERVER_GC_OPTS" ]; then
+ export SERVER_GC_OPTS=${SERVER_GC_OPTS/"-Xloggc:"/"-Xloggc:${loggc}"}
+fi
+if [ -n "$CLIENT_GC_OPTS" ]; then
+ export CLIENT_GC_OPTS=${CLIENT_GC_OPTS/"-Xloggc:"/"-Xloggc:${loggc}"}
fi
# Set default scheduling priority
diff --git a/bin/hbase-daemons.sh b/bin/hbase-daemons.sh
index 843eaaa74ffe..21ce635777d9 100755
--- a/bin/hbase-daemons.sh
+++ b/bin/hbase-daemons.sh
@@ -1,8 +1,6 @@
#!/usr/bin/env bash
#
#/**
-# * Copyright 2007 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
diff --git a/bin/hirb.rb b/bin/hirb.rb
index 32a51b3bf2f7..c8f13b5703d4 100644
--- a/bin/hirb.rb
+++ b/bin/hirb.rb
@@ -1,6 +1,4 @@
#
-# Copyright 2009 The Apache Software Foundation
-#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
diff --git a/bin/local-master-backup.sh b/bin/local-master-backup.sh
old mode 100644
new mode 100755
index 2c0a4c02c76d..c945e2b39646
--- a/bin/local-master-backup.sh
+++ b/bin/local-master-backup.sh
@@ -1,7 +1,5 @@
-#!/bin/sh
+#!/usr/bin/env bash
#/**
-# * Copyright 2007 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
diff --git a/bin/local-regionservers.sh b/bin/local-regionservers.sh
old mode 100644
new mode 100755
index a4d5a1d93211..29adcf396a33
--- a/bin/local-regionservers.sh
+++ b/bin/local-regionservers.sh
@@ -1,7 +1,5 @@
-#!/bin/sh
+#!/usr/bin/env bash
#/**
-# * Copyright 2007 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
diff --git a/bin/master-backup.sh b/bin/master-backup.sh
index d20f5793e094..feca4ab86572 100755
--- a/bin/master-backup.sh
+++ b/bin/master-backup.sh
@@ -1,8 +1,6 @@
#!/usr/bin/env bash
#
#/**
-# * Copyright 2010 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
@@ -25,11 +23,11 @@
# Environment Variables
#
# HBASE_BACKUP_MASTERS File naming remote hosts.
-# Default is ${HADOOP_CONF_DIR}/backup-masters
+# Default is ${HBASE_CONF_DIR}/backup-masters
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_HOME}/conf.
# HBASE_CONF_DIR Alternate hbase conf dir. Default is ${HBASE_HOME}/conf.
-# HADOOP_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
-# HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
+# HBASE_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
+# HBASE_SSH_OPTS Options passed to ssh when running remote commands.
#
# Modelled after $HADOOP_HOME/bin/slaves.sh.
diff --git a/bin/region_mover.rb b/bin/region_mover.rb
index 028f9a89cbe7..1ea785d4f1b3 100644
--- a/bin/region_mover.rb
+++ b/bin/region_mover.rb
@@ -1,5 +1,3 @@
-# Copyright 2011 The Apache Software Foundation
-#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -75,12 +73,22 @@ def getTable(config, name)
return $TABLES[key]
end
+def closeTables()
+ if not $TABLES
+ return
+ end
+
+ $LOG.info("Close all tables")
+ $TABLES.each do |name, table|
+ $TABLES.delete(name)
+ table.close()
+ end
+end
# Returns true if passed region is still on 'original' when we look at .META.
def isSameServer(admin, r, original)
server = getServerNameForRegion(admin, r)
- return false unless server
- return true unless original
+ return false unless server and original
return server == original
end
@@ -94,6 +102,7 @@ def abort(why, e)
# Get servername that is up in .META.; this is hostname + port + startcode comma-delimited.
# Can return nil
def getServerNameForRegion(admin, r)
+ return nil unless admin.isTableEnabled(r.getTableName)
if r.isRootRegion()
# Hack
tracker = org.apache.hadoop.hbase.zookeeper.RootRegionTracker.new(admin.getConnection().getZooKeeperWatcher(), RubyAbortable.new())
@@ -116,6 +125,7 @@ def getServerNameForRegion(admin, r)
g.addColumn(HConstants::CATALOG_FAMILY, HConstants::SERVER_QUALIFIER)
g.addColumn(HConstants::CATALOG_FAMILY, HConstants::STARTCODE_QUALIFIER)
result = table.get(g)
+ return nil unless result
server = result.getValue(HConstants::CATALOG_FAMILY, HConstants::SERVER_QUALIFIER)
startcode = result.getValue(HConstants::CATALOG_FAMILY, HConstants::STARTCODE_QUALIFIER)
return nil unless server
@@ -129,15 +139,24 @@ def isSuccessfulScan(admin, r)
scan.setBatch(1)
scan.setCaching(1)
scan.setFilter(FirstKeyOnlyFilter.new())
- table = getTable(admin.getConfiguration(), r.getTableDesc().getName())
- scanner = table.getScanner(scan)
+ begin
+ table = getTable(admin.getConfiguration(), r.getTableName())
+ scanner = table.getScanner(scan)
+ rescue org.apache.hadoop.hbase.TableNotFoundException,
+ org.apache.hadoop.hbase.TableNotEnabledException => e
+ $LOG.warn("Region " + r.getEncodedName() + " belongs to recently " +
+ "deleted/disabled table. Skipping... " + e.message)
+ return
+ end
begin
results = scanner.next()
# We might scan into next region, this might be an empty table.
# But if no exception, presume scanning is working.
ensure
scanner.close()
- table.close()
+ # Do not close the htable. It is cached in $TABLES and
+ # may be reused in moving another region of same table.
+ # table.close()
end
end
@@ -150,6 +169,7 @@ def move(admin, r, newServer, original)
retries = admin.getConfiguration.getInt("hbase.move.retries.max", 5)
count = 0
same = true
+ start = Time.now
while count < retries and same
if count > 0
$LOG.info("Retry " + count.to_s + " of maximum " + retries.to_s)
@@ -157,7 +177,8 @@ def move(admin, r, newServer, original)
count = count + 1
begin
admin.move(Bytes.toBytes(r.getEncodedName()), Bytes.toBytes(newServer))
- rescue java.lang.reflect.UndeclaredThrowableException => e
+ rescue java.lang.reflect.UndeclaredThrowableException,
+ org.apache.hadoop.hbase.UnknownRegionException => e
$LOG.info("Exception moving " + r.getEncodedName() +
"; split/moved? Continuing: " + e)
return
@@ -174,6 +195,8 @@ def move(admin, r, newServer, original)
raise RuntimeError, "Region stuck on #{original}, newserver=#{newServer}" if same
# Assert can Scan from new location.
isSuccessfulScan(admin, r)
+ $LOG.info("Moved region " + r.getRegionNameAsString() + " cost: " +
+ java.lang.String.format("%.3f", (Time.now - start)))
end
# Return the hostname portion of a servername (all up to first ',')
@@ -213,6 +236,16 @@ def stripServer(servers, hostname)
return servername
end
+# Returns a new serverlist that excludes the servername whose hostname portion
+# matches from the passed array of servers.
+def stripExcludes(servers, excludefile)
+ excludes = readExcludes(excludefile)
+ servers = servers.find_all{|server| !excludes.contains(getHostnameFromServerName(server)) }
+ # return updated servers list
+ return servers
+end
+
+
# Return servername that matches passed hostname
def getServerName(servers, hostname)
servername = nil
@@ -309,9 +342,15 @@ def unloadRegions(options, hostname)
# Remove the server we are unloading from from list of servers.
# Side-effect is the servername that matches this hostname
servername = stripServer(servers, hostname)
+
+ # Remove the servers in our exclude list from list of servers.
+ servers = stripExcludes(servers, options[:excludesFile])
+ puts "Valid region move targets: ", servers
movedRegions = java.util.ArrayList.new()
while true
rs = getRegions(config, servername)
+ # Remove those already tried to move
+ rs.removeAll(movedRegions)
break if rs.length == 0
count = 0
$LOG.info("Moving " + rs.length.to_s + " region(s) from " + servername +
@@ -319,8 +358,9 @@ def unloadRegions(options, hostname)
for r in rs
# Get a random server to move the region to.
server = servers[rand(servers.length)]
- $LOG.info("Moving region " + r.getEncodedName() + " (" + count.to_s +
- " of " + rs.length.to_s + ") to server=" + server);
+ $LOG.info("Moving region " + r.getRegionNameAsString() + " (" +
+ (count + 1).to_s + " of " + rs.length.to_s + ") from server=" +
+ servername + " to server=" + server);
count = count + 1
# Assert we can scan region in its current location
isSuccessfulScan(admin, r)
@@ -361,10 +401,13 @@ def loadRegions(options, hostname)
end
$LOG.info("Moving " + regions.size().to_s + " regions to " + servername)
count = 0
+ # sleep 20s to make sure the rs finished initialization.
+ sleep 20
for r in regions
exists = false
begin
- exists = isSuccessfulScan(admin, r)
+ isSuccessfulScan(admin, r)
+ exists = true
rescue org.apache.hadoop.hbase.NotServingRegionException => e
$LOG.info("Failed scan of " + e.message)
end
@@ -376,12 +419,36 @@ def loadRegions(options, hostname)
" of " + regions.length.to_s + ") already on target server=" + servername)
next
end
- $LOG.info("Moving region " + r.getEncodedName() + " (" + count.to_s +
- " of " + regions.length.to_s + ") to server=" + servername);
+ $LOG.info("Moving region " + r.getRegionNameAsString() + " (" +
+ (count + 1).to_s + " of " + regions.length.to_s + ") from server=" +
+ currentServer.to_s + " to server=" + servername.to_s);
move(admin, r, servername, currentServer)
end
end
+# Returns an array of hosts to exclude as region move targets
+def readExcludes(filename)
+ if filename == nil
+ return java.util.ArrayList.new()
+ end
+ if ! File.exist?(filename)
+ puts "Error: Unable to read host exclude file: ", filename
+ raise RuntimeError
+ end
+
+ f = File.new(filename, "r")
+ # Read excluded hosts list
+ excludes = java.util.ArrayList.new()
+ while (line = f.gets)
+ line.strip! # do an inplace drop of pre and post whitespaces
+ excludes.add(line) unless line.empty? # exclude empty lines
+ end
+ puts "Excluding hosts as region move targets: ", excludes
+ f.close
+
+ return excludes
+end
+
def getFilename(options, targetServer)
filename = options[:file]
if not filename
@@ -408,6 +475,9 @@ def getFilename(options, targetServer)
opts.on('-d', '--debug', 'Display extra debug logging') do
options[:debug] = true
end
+ opts.on('-x', '--excludefile=FILE', 'File with hosts-per-line to exclude as unload targets; default excludes only target host; useful for rack decommisioning.') do |file|
+ options[:excludesFile] = file
+ end
end
optparse.parse!
@@ -432,3 +502,5 @@ def getFilename(options, targetServer)
puts optparse
exit 3
end
+
+closeTables()
diff --git a/bin/regionservers.sh b/bin/regionservers.sh
index 9759f2b00c41..fc96563b733a 100755
--- a/bin/regionservers.sh
+++ b/bin/regionservers.sh
@@ -1,8 +1,6 @@
#!/usr/bin/env bash
#
#/**
-# * Copyright 2007 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
@@ -28,8 +26,8 @@
# Default is ${HADOOP_CONF_DIR}/regionservers
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_HOME}/conf.
# HBASE_CONF_DIR Alternate hbase conf dir. Default is ${HBASE_HOME}/conf.
-# HADOOP_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
-# HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
+# HBASE_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
+# HBASE_SSH_OPTS Options passed to ssh when running remote commands.
#
# Modelled after $HADOOP_HOME/bin/slaves.sh.
diff --git a/bin/replication/copy_tables_desc.rb b/bin/replication/copy_tables_desc.rb
index ed85655933fe..6d4e9c8fc452 100644
--- a/bin/replication/copy_tables_desc.rb
+++ b/bin/replication/copy_tables_desc.rb
@@ -1,6 +1,4 @@
#
-# Copyright 2010 The Apache Software Foundation
-#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -32,7 +30,6 @@
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.HTableDescriptor
import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper
# Name of this script
NAME = "copy_tables_desc"
diff --git a/bin/rolling-restart.sh b/bin/rolling-restart.sh
index 8c3cc2bf5731..362b29cd0c1c 100755
--- a/bin/rolling-restart.sh
+++ b/bin/rolling-restart.sh
@@ -1,8 +1,6 @@
#!/usr/bin/env bash
#
#/**
-# * Copyright 2007 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
@@ -28,13 +26,13 @@
# Default is ${HADOOP_CONF_DIR}/regionservers
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_HOME}/conf.
# HBASE_CONF_DIR Alternate hbase conf dir. Default is ${HBASE_HOME}/conf.
-# HADOOP_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
-# HADOOP_SLAVE_TIMEOUT Seconds to wait for timing out a remote command.
-# HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
+# HBASE_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
+# HBASE_SLAVE_TIMEOUT Seconds to wait for timing out a remote command.
+# HBASE_SSH_OPTS Options passed to ssh when running remote commands.
#
# Modelled after $HADOOP_HOME/bin/slaves.sh.
-usage="Usage: $0 [--config ] [--rs-only] [--master-only]"
+usage="Usage: $0 [--config ] [--rs-only] [--master-only] [--graceful]"
bin=`dirname "$0"`
bin=`cd "$bin">/dev/null; pwd`
@@ -56,16 +54,24 @@ function usage() {
RR_RS=1
RR_MASTER=1
+RR_GRACEFUL=0
for x in "$@" ; do
case "$x" in
--rs-only|-r)
RR_RS=1
RR_MASTER=0
+ RR_GRACEFUL=0
;;
--master-only)
RR_RS=0
RR_MASTER=1
+ RR_GRACEFUL=0
+ ;;
+ --graceful)
+ RR_RS=0
+ RR_MASTER=0
+ RR_GRACEFUL=1
;;
*)
echo Bad argument: $x
@@ -76,7 +82,8 @@ for x in "$@" ; do
done
# quick function to get a value from the HBase config file
-distMode=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed`
+# HBASE-6504 - only take the first line of the output in case verbose gc is on
+distMode=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed | head -n 1`
if [ "$distMode" == 'false' ]; then
if [ $RR_RS -ne 1 ] || [ $RR_MASTER -ne 1 ]; then
echo Cant do selective rolling restart if not running distributed
@@ -84,6 +91,9 @@ if [ "$distMode" == 'false' ]; then
fi
"$bin"/hbase-daemon.sh restart master
else
+ zparent=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.parent`
+ if [ "$zparent" == "null" ]; then zparent="/hbase"; fi
+
if [ $RR_MASTER -eq 1 ]; then
# stop all masters before re-start to avoid races for master znode
"$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" stop master
@@ -91,13 +101,11 @@ else
--hosts "${HBASE_BACKUP_MASTERS}" stop master-backup
# make sure the master znode has been deleted before continuing
- zparent=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.parent`
- if [ "$zparent" == "null" ]; then zparent="/hbase"; fi
zmaster=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.master`
if [ "$zmaster" == "null" ]; then zmaster="master"; fi
zmaster=$zparent/$zmaster
echo -n "Waiting for Master ZNode ${zmaster} to expire"
- while bin/hbase zkcli stat $zmaster >/dev/null 2>&1; do
+ while ! "$bin"/hbase zkcli stat $zmaster 2>&1 | grep "Node does not exist"; do
echo -n "."
sleep 1
done
@@ -136,4 +144,20 @@ else
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
--hosts "${HBASE_REGIONSERVERS}" restart regionserver
fi
+
+ if [ $RR_GRACEFUL -eq 1 ]; then
+ # gracefully restart all online regionservers
+ zkrs=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.rs`
+ if [ "$zkrs" == "null" ]; then zkrs="rs"; fi
+ zkrs="$zparent/$zkrs"
+ online_regionservers=`$bin/hbase zkcli ls $zkrs 2>&1 | tail -1 | sed "s/\[//" | sed "s/\]//"`
+ for rs in $online_regionservers
+ do
+ rs_parts=(${rs//,/ })
+ hostname=${rs_parts[0]}
+ echo "Gracefully restarting: $hostname"
+ "$bin"/graceful_stop.sh --config "${HBASE_CONF_DIR}" --restart --reload --debug "$hostname"
+ sleep 1
+ done
+ fi
fi
diff --git a/bin/start-hbase.sh b/bin/start-hbase.sh
index 6240ee649abf..aed729a808bf 100755
--- a/bin/start-hbase.sh
+++ b/bin/start-hbase.sh
@@ -1,8 +1,6 @@
#!/usr/bin/env bash
#
#/**
-# * Copyright 2007 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
@@ -38,12 +36,13 @@ then
exit $errCode
fi
-distMode=`$bin/hbase --config "$HBASE_CONF_DIR" org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed`
+# HBASE-6504 - only take the first line of the output in case verbose gc is on
+distMode=`$bin/hbase --config "$HBASE_CONF_DIR" org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed | head -n 1`
if [ "$distMode" == 'false' ]
then
- "$bin"/hbase-daemon.sh start master
+ "$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master
else
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" start zookeeper
"$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master
diff --git a/bin/stop-hbase.sh b/bin/stop-hbase.sh
index b3828345d51d..5b2e69d6f21e 100755
--- a/bin/stop-hbase.sh
+++ b/bin/stop-hbase.sh
@@ -1,8 +1,6 @@
#!/usr/bin/env bash
#
#/**
-# * Copyright 2007 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
@@ -60,7 +58,8 @@ done
echo
# distributed == false means that the HMaster will kill ZK when it exits
-distMode=`$bin/hbase --config "$HBASE_CONF_DIR" org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed`
+# HBASE-6504 - only take the first line of the output in case verbose gc is on
+distMode=`$bin/hbase --config "$HBASE_CONF_DIR" org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed | head -n 1`
if [ "$distMode" == 'true' ]
then
# TODO: store backup masters in ZooKeeper and have the primary send them a shutdown message
diff --git a/bin/zookeepers.sh b/bin/zookeepers.sh
index 89a214e5a809..97bf41b60528 100755
--- a/bin/zookeepers.sh
+++ b/bin/zookeepers.sh
@@ -1,8 +1,6 @@
#!/usr/bin/env bash
#
#/**
-# * Copyright 2009 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
diff --git a/conf/hadoop-metrics.properties b/conf/hadoop-metrics.properties
index 046a369524da..4eb70a6911d3 100644
--- a/conf/hadoop-metrics.properties
+++ b/conf/hadoop-metrics.properties
@@ -1,3 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
# See http://wiki.apache.org/hadoop/GangliaMetrics
# Make sure you know whether you are using ganglia 3.0 or 3.1.
# If 3.1, you will have to patch your hadoop instance with HADOOP-4675
@@ -7,13 +23,18 @@
# for the moment.
#
# See also http://hadoop.apache.org/hbase/docs/current/metrics.html
+# GMETADHOST_IP is the hostname (or) IP address of the server on which the ganglia
+# meta daemon (gmetad) service is running
-# Configuration of the "hbase" context for null
-hbase.class=org.apache.hadoop.metrics.spi.NullContext
+# Configuration of the "hbase" context for NullContextWithUpdateThread
+# NullContextWithUpdateThread is a null context which has a thread calling
+# periodically when monitoring is started. This keeps the data sampled
+# correctly.
+hbase.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
+hbase.period=10
# Configuration of the "hbase" context for file
# hbase.class=org.apache.hadoop.hbase.metrics.file.TimeStampingFileContext
-# hbase.period=10
# hbase.fileName=/tmp/metrics_hbase.log
# HBase-specific configuration to reset long-running stats (e.g. compactions)
@@ -28,11 +49,11 @@ hbase.extendedperiod = 3600
# hbase.servers=GMETADHOST_IP:8649
# Configuration of the "jvm" context for null
-jvm.class=org.apache.hadoop.metrics.spi.NullContext
+jvm.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
+jvm.period=10
# Configuration of the "jvm" context for file
# jvm.class=org.apache.hadoop.hbase.metrics.file.TimeStampingFileContext
-# jvm.period=10
# jvm.fileName=/tmp/metrics_jvm.log
# Configuration of the "jvm" context for ganglia
@@ -43,11 +64,11 @@ jvm.class=org.apache.hadoop.metrics.spi.NullContext
# jvm.servers=GMETADHOST_IP:8649
# Configuration of the "rpc" context for null
-rpc.class=org.apache.hadoop.metrics.spi.NullContext
+rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
+rpc.period=10
# Configuration of the "rpc" context for file
# rpc.class=org.apache.hadoop.hbase.metrics.file.TimeStampingFileContext
-# rpc.period=10
# rpc.fileName=/tmp/metrics_rpc.log
# Configuration of the "rpc" context for ganglia
@@ -56,3 +77,10 @@ rpc.class=org.apache.hadoop.metrics.spi.NullContext
# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
# rpc.period=10
# rpc.servers=GMETADHOST_IP:8649
+
+# Configuration of the "rest" context for ganglia
+# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
+# rest.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# rest.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# rest.period=10
+# rest.servers=GMETADHOST_IP:8649
diff --git a/conf/hbase-env.sh b/conf/hbase-env.sh
index bc293b4b52f0..39f3b74e54bb 100644
--- a/conf/hbase-env.sh
+++ b/conf/hbase-env.sh
@@ -1,7 +1,5 @@
#
#/**
-# * Copyright 2007 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
@@ -21,6 +19,10 @@
# Set environment variables here.
+# This script sets variables multiple times over the course of starting an hbase process,
+# so try to keep things idempotent unless you want to take an even deeper look
+# into the startup scripts (bin/hbase, etc.)
+
# The java implementation to use. Java 1.6 required.
# export JAVA_HOME=/usr/java/jdk1.6.0/
@@ -36,12 +38,31 @@
# see http://wiki.apache.org/hadoop/PerformanceTuning
export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
-# Uncomment below to enable java garbage collection logging in the .out file.
-# export HBASE_OPTS="$HBASE_OPTS -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps $HBASE_GC_OPTS"
+# Uncomment one of the below three options to enable java garbage collection logging for the server-side processes.
+
+# This enables basic gc logging to the .out file.
+# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
+
+# This enables basic gc logging to its own file.
+# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
+# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:"
-# Uncomment below (along with above GC logging) to put GC information in its own logfile (will set HBASE_GC_OPTS)
-# export HBASE_USE_GC_LOGFILE=true
+# This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
+# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
+# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc: -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
+# Uncomment one of the below three options to enable java garbage collection logging for the client processes.
+
+# This enables basic gc logging to the .out file.
+# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
+
+# This enables basic gc logging to its own file.
+# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
+# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:"
+
+# This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
+# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
+# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc: -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
# Uncomment below if you intend to use the EXPERIMENTAL off heap cache.
# export HBASE_OPTS="$HBASE_OPTS -XX:MaxDirectMemorySize="
@@ -61,6 +82,9 @@ export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
# File naming hosts on which HRegionServers will run. $HBASE_HOME/conf/regionservers by default.
# export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers
+# File naming hosts on which backup HMaster will run. $HBASE_HOME/conf/backup-masters by default.
+# export HBASE_BACKUP_MASTERS=${HBASE_HOME}/conf/backup-masters
+
# Extra ssh options. Empty by default.
# export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR"
diff --git a/conf/hbase-site.xml b/conf/hbase-site.xml
index af4c30095216..3ecd24c2cf4d 100644
--- a/conf/hbase-site.xml
+++ b/conf/hbase-site.xml
@@ -2,8 +2,6 @@
+
+
+
+
+Creates a report in the directory "hbase_jdiff_report-p-PREVIOUS_BRANCH-c-CURRENT_BRANCH" of the default jdiff report folder.
+This defaults to /tmp/jdiff but can optionally be specified by export $JDIFF_WORKING_DIRECTORY.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dev-support/hbase_jdiff_afterSingularityTemplate.xml b/dev-support/hbase_jdiff_afterSingularityTemplate.xml
new file mode 100644
index 000000000000..6c4cd9355202
--- /dev/null
+++ b/dev-support/hbase_jdiff_afterSingularityTemplate.xml
@@ -0,0 +1,66 @@
+
+
+
+
+
+
+Creates a report in the directory "hbase_jdiff_report-p-PREVIOUS_BRANCH-c-CURRENT_BRANCH" of the default jdiff report folder.
+This defaults to /tmp/jdiff but can optionally be specified by export $JDIFF_WORKING_DIRECTORY.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dev-support/hbase_jdiff_template.xml b/dev-support/hbase_jdiff_template.xml
new file mode 100644
index 000000000000..21fe8ed1b299
--- /dev/null
+++ b/dev-support/hbase_jdiff_template.xml
@@ -0,0 +1,53 @@
+
+
+
+
+
+
+Creates a report in the directory "hbase_jdiff_report-p-PREVIOUS_BRANCH-c-CURRENT_BRANCH" of the default jdiff report folder.
+This defaults to /tmp/jdiff but can optionally be specified by export $JDIFF_WORKING_DIRECTORY.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dev-support/jdiffHBasePublicAPI.sh b/dev-support/jdiffHBasePublicAPI.sh
new file mode 100644
index 000000000000..2000d2abd945
--- /dev/null
+++ b/dev-support/jdiffHBasePublicAPI.sh
@@ -0,0 +1,249 @@
+#!/bin/bash
+set -e
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+################################################ ABOUT JDIFF #######################################################
+#
+# What is JDiff? JDiff is a tool for comparing the public APIs of two separate Java codebases. Like diff, it will
+# give additions, changes, and removals. It will output an HTML report with the information.
+# To learn more, visit http://javadiff.sourceforge.net/.
+# JDiff is licensed under LGPL.
+
+############################################# QUICK-START EXAMPLE ##################################################
+#
+# Suppose we wanted to see the API diffs between HBase 0.92 and HBase 0.94. We could use this tool like so:
+# > ./jdiffHBasePublicAPI.sh https://github.com/apache/hbase.git 0.92 https://github.com/apache/hbase.git 0.94
+#
+# This would generate a report in the local folder /tmp/jdiff/hbase_jdiff_report-p-0.92-c-0.94/
+# To view the report, simply examine /tmp/jdiff/hbase_jdiff_report-p-0.92-c-0.94/changes.html in your choice of
+# browser.
+#
+# Note that this works because 0.92 and 0.94 have the source directory structure that is specified in the
+# hbase_jdiff_template.xml file. To compare 0.95 to 0.96, which have the post-singularity structure, two other
+# template files (included) are used. The formats are autoated and is all taken care of automatically by the script.
+#
+# On a local machine, JDiff reports have taken ~20-30 minutes to run. On Jenkins, it has taken over 35 minutes
+# in some cases. Your mileage may vary. Trunk and 0.95 take more time than 0.92 and 0.94.
+#
+#
+############################################ SPECIFYING A LOCAL REPO ###############################################
+#
+# The JDiff tool also works with local code. Instead of specifying a repo and a branch, you can specifying the
+# absolute path of the ./hbase folder and a name for code (e.g. experimental_94).
+#
+# A local repo can be specified for none, one, or both of the sources.
+#
+############################################### EXAMPLE USE CASES ##################################################
+#
+# Example 1: Generate a report to check if potential change doesn't break API compatibility with Apache HBase 0.94
+#
+# In this case, you could compare the version you are using against a repo branch where your changes are.
+# > ./jdiffHBasePublicAPI.sh https://github.com/apache/hbase.git 0.94 https://github.com/MY_REPO/hbase.git 0.94
+#
+# Example 2: Generate a report to check if two branches of the same repo have any public API incompatibilities
+# > ./jdiffHBasePublicAPI.sh https://github.com/MY_REPO/hbase.git $BRANCH_1 \
+# > https://github.com/MY_REPO/hbase.git $BRANCH_2
+#
+# Example 3: Have Example 1 done in a special directory in the user's home folder
+#
+# > export JDIFF_WORKING_DIRECTORY=~/jdiff_reports
+# > ./jdiffHBasePublicAPI.sh https://github.com/apache/hbase.git 0.94 https://github.com/MY_REPO/hbase.git 0.94
+#
+# Example 4: Check the API diff of a local change against an existing repo branch.
+# > ./jdiffHBasePublicAPI.sh https://github.com/apache/hbase.git 0.95 /home/aleks/exp_hbase/hbase experiment_95
+#
+# Example 5: Compare two local repos for public API changes
+# > ./jdiffHBasePublicAPI.sh /home/aleks/stable_hbase/hbase stable_95 /home/aleks/exp_hbase/hbase experiment_95
+#
+#
+################################################## NOTE ON USAGE ###################################################
+#
+# 1. When using this tool, please specify the initial version first and the current version second. The semantics
+# do not make sense otherwise. For example: jdiff 94 95 is good. jdiff 95 94 is bad
+#
+############################################# READING A JDIFF REPORT ###############################################
+#
+# The purpose of the JDiff report is show things that have changed between two versions of the public API. A user
+# would use this report to determine if committing a change would cause existing API clients to break. To do so,
+# there are specific things that one should look for in the report.
+#
+# 1. Identify the classes that constitute the public API. An example in 0.94 might be all classes in
+# org.apache.hadoop.hbase.client.*
+# 2. After identifying those classes, go through each one and look for offending changes.
+# Those may include, but are not limited to:
+# 1. Removed methods
+# 2. Changed methods (including changes in return type and exception types)
+# 3. Methods added to interfaces
+# 4. Changed class inheritence information (may in innocuous but definitely worth validating)
+# 5. Removed or renamed public static member variables and constants
+# 6. Removed or renamed packages
+# 7. Class moved to a different package
+
+########################################### SETTING THE JDIFF WORKING DIRECTORY ####################################
+#
+# By default, the working environment of jdiff is /tmp/jdiff. However, sometimes it is nice to have it place reports
+# and temp files elsewhere. In that case, please export JDIFF_WORKING_DIRECTORY into the bash environment and this
+# script will pick that up and use it.
+#
+
+scriptDirectory=$(dirname ${BASH_SOURCE[0]})
+x=`echo $scriptDirectory | sed "s{\.{{g"`
+DEV_SUPPORT_HOME="`pwd`$x"
+. $scriptDirectory/jdiffHBasePublicAPI_common.sh
+
+EXPECTED_ARGS=4
+
+if [[ "$#" -ne "$EXPECTED_ARGS" ]]; then
+ echo "This tool expects $EXPECTED_ARGS arguments, but received $#. Please check your command and try again.";
+ echo "Usage: $0 "
+ exit 1;
+fi
+
+echo "JDiff evaluation beginning:";
+isGitRepo $1
+FIRST_SOURCE_TYPE=$INPUT_FORMAT;
+isGitRepo $3
+SECOND_SOURCE_TYPE=$INPUT_FORMAT;
+
+PREVIOUS_BRANCH=$2 ## We will still call it a branch even if it's not from a git repo.
+CURRENT_BRANCH=$4
+
+echo "We are going to compare source 1 which is a $FIRST_SOURCE_TYPE and source 2, which is a $SECOND_SOURCE_TYPE"
+
+
+# Check that if either source is from a git repo, that the name is reasonable.
+if [[ "$FIRST_SOURCE_TYPE" = "git_repo" ]]; then
+
+ git check-ref-format --branch $2
+fi
+
+if [[ "$SECOND_SOURCE_TYPE" = "git_repo" ]]; then
+
+ git check-ref-format --branch $4
+fi
+
+#If the JDIFF_WORKING_DIRECTORY is set, then we will output the report there. Otherwise, to the default location
+if [[ "$JDIFF_WORKING_DIRECTORY" = "" ]]; then
+
+ JDIFF_WORKING_DIRECTORY=/tmp/jdiff
+ echo "JDIFF_WORKING_DIRECTORY not set. That's not an issue. We will default it to $JDIFF_WORKING_DIRECTORY."
+else
+ echo "JDIFF_WORKING_DIRECTORY set to $JDIFF_WORKING_DIRECTORY";
+fi
+mkdir -p $JDIFF_WORKING_DIRECTORY
+
+# We will need this to reference the template we want to use
+cd $JDIFF_WORKING_DIRECTORY
+scenario_template_name=hbase_jdiff_p-$PREVIOUS_BRANCH-c-$CURRENT_BRANCH.xml
+
+
+# Pull down JDiff tool and unpack it
+if [ ! -d jdiff-1.1.1-with-incompatible-option ]; then
+ curl -O http://cloud.github.com/downloads/tomwhite/jdiff/jdiff-1.1.1-with-incompatible-option.zip
+ unzip jdiff-1.1.1-with-incompatible-option.zip
+fi
+
+JDIFF_HOME=`pwd`/jdiff-1.1.1-with-incompatible-option
+cd $JDIFF_WORKING_DIRECTORY
+
+# Pull down sources if necessary. Note that references to previous change are prefaced with p- in order to avoid collission of branch names
+if [[ "$FIRST_SOURCE_TYPE" = "git_repo" ]]; then
+
+ PREVIOUS_REPO=$1
+ rm -rf p-$PREVIOUS_BRANCH
+ mkdir -p p-$PREVIOUS_BRANCH
+ cd p-$PREVIOUS_BRANCH
+ git clone --depth 1 $PREVIOUS_REPO && cd hbase && git checkout origin/$PREVIOUS_BRANCH
+ cd $JDIFF_WORKING_DIRECTORY
+ HBASE_1_HOME=`pwd`/p-$PREVIOUS_BRANCH/hbase
+else
+ HBASE_1_HOME=$1
+fi
+
+echo "HBASE_1_HOME set to $HBASE_1_HOME"
+echo "In HBASE_1_HOME, we have"
+ls -la $HBASE_1_HOME
+
+if [[ "$SECOND_SOURCE_TYPE" = "git_repo" ]]; then
+ CURRENT_REPO=$3
+ rm -rf $JDIFF_WORKING_DIRECTORY/c-$CURRENT_BRANCH
+ mkdir -p $JDIFF_WORKING_DIRECTORY/c-$CURRENT_BRANCH
+ cd $JDIFF_WORKING_DIRECTORY/c-$CURRENT_BRANCH
+ git clone --depth 1 $CURRENT_REPO && cd hbase && git checkout origin/$CURRENT_BRANCH
+ cd $JDIFF_WORKING_DIRECTORY
+ HBASE_2_HOME=`pwd`/c-$CURRENT_BRANCH/hbase
+else
+ HBASE_2_HOME=$3
+fi
+
+echo "HBASE_2_HOME set to $HBASE_2_HOME"
+echo "In HBASE_2_HOME, we have"
+ls -la $HBASE_2_HOME
+
+# Next step is to pull down the proper template based on the directory structure
+isNewFormat $HBASE_1_HOME
+export P_FORMAT=$BRANCH_FORMAT
+
+isNewFormat $HBASE_2_HOME
+export C_FORMAT=$BRANCH_FORMAT
+
+if [[ "$C_FORMAT" = "new" ]]; then
+
+ if [[ "$P_FORMAT" = "new" ]]; then
+ templateFile=$DEV_SUPPORT_HOME/hbase_jdiff_afterSingularityTemplate.xml
+ echo "Previous format is of the new style. We'll be using template $templateFile";
+ else
+ templateFile=$DEV_SUPPORT_HOME/hbase_jdiff_acrossSingularityTemplate.xml
+ echo "Previous format is of the old style. We'll be using template $templateFile";
+ fi
+
+else
+
+ if [[ "P_FORMAT" != "old" ]]; then
+ echo "When using this tool, please specify the initial version first and the current version second. They should be in ascending chronological order.
+ The semantics do not make sense otherwise. For example: jdiff 94 95 is good. jdiff 95 94 is bad."
+ echo "Exiting the script."
+ exit 5;
+ fi
+ templateFile=$DEV_SUPPORT_HOME/hbase_jdiff_template.xml
+ echo "Both formats are using the 94 and earlier style directory format. We'll be using template $templateFile"
+fi
+
+cp $templateFile $JDIFF_WORKING_DIRECTORY/$scenario_template_name
+
+### Configure the jdiff script
+
+### Note that PREVIOUS_BRANCH and CURRENT_BRANCH will be the absolute locations of the source.
+echo "Configuring the jdiff script"
+sed -i "s]hbase_jdiff_report]hbase_jdiff_report-p-$PREVIOUS_BRANCH-c-$CURRENT_BRANCH]g" $JDIFF_WORKING_DIRECTORY/$scenario_template_name
+sed -i "s]JDIFF_HOME_NAME]$JDIFF_HOME]g" $JDIFF_WORKING_DIRECTORY/$scenario_template_name
+sed -i "s]OLD_BRANCH_NAME]$HBASE_1_HOME]g" $JDIFF_WORKING_DIRECTORY/$scenario_template_name
+sed -i "s]NEW_BRANCH_NAME]$HBASE_2_HOME]g" $JDIFF_WORKING_DIRECTORY/$scenario_template_name
+
+sed -i "s]V1]$PREVIOUS_BRANCH]g" $JDIFF_WORKING_DIRECTORY/$scenario_template_name
+sed -i "s]V2]$CURRENT_BRANCH]g" $JDIFF_WORKING_DIRECTORY/$scenario_template_name
+
+sed -i "s]JDIFF_FOLDER]$JDIFF_WORKING_DIRECTORY]g" $JDIFF_WORKING_DIRECTORY/$scenario_template_name
+
+echo "Running jdiff";
+ls -la $JDIFF_WORKING_DIRECTORY;
+ant -f $JDIFF_WORKING_DIRECTORY/$scenario_template_name;
+
+echo "jdiff operation complete. Report placed into $JDIFF_WORKING_DIRECTORY/hbase_jdiff_report-p-$PREVIOUS_BRANCH-c-$CURRENT_BRANCH/changes.html";
+
diff --git a/dev-support/jdiffHBasePublicAPI_common.sh b/dev-support/jdiffHBasePublicAPI_common.sh
new file mode 100644
index 000000000000..1cc99549b585
--- /dev/null
+++ b/dev-support/jdiffHBasePublicAPI_common.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+##########################################################################################################################
+#
+### Purpose: To describe whether the directory specified has the old directory format or the new directory format
+### Usage: This function takes one argument: The directory in question
+### It will set the temporary variable BRANCH_FORMAT. This variable can change with every call, so it is up to the user to
+### store it into something else as soon as the function exists
+### Example:
+### > isNewFormat ./myDevDir/testing/branch/hbase
+isNewFormat() {
+
+ echo "Determining if directory $1 is of the 0.94 and before OR 0.95 and after versions";
+ if [[ "$1" = "" ]]; then
+ echo "Directory not specified. Exiting";
+ fi
+ echo "First, check that $1 exists";
+ if [[ -d $1 ]]; then
+ echo "Directory $1 exists"
+ else
+ echo "Directory $1 does not exist. Exiting";
+ exit 1;
+ fi
+
+ if [[ -d "$1/hbase-server" ]]; then
+
+ echo "The directory $1/hbase-server exists so this is of the new format";
+ export BRANCH_FORMAT=new;
+
+ else
+ echo "The directory $1/hbase-server does not exist. Therefore, this is of the old format";
+ export BRANCH_FORMAT=old;
+ fi
+}
+
+### Purpose: To describe whether the argument specified is a git repo or a local directory
+### Usage: This function takes one argument: The directory in question
+### It will set the temporary variable INPUT_FORMAT. This variable can change with every call, so it is up to the user to
+### store it into something else as soon as the function exists
+### Example:
+### > isGitRepo ./myDevDir/testing/branch/hbase
+
+isGitRepo() {
+
+ echo "Determining if this is a local directory or a git repo.";
+ if [[ "$1" = "" ]]; then
+ echo "No value specified for repo or directory. Exiting."
+ exit 1;
+ fi
+
+ if [[ `echo $1 | grep 'http://'` || `echo $1 | grep 'https://'` || `echo $1 | grep 'git://'` ]]; then
+ echo "Looks like $1 is a git repo";
+ export INPUT_FORMAT=git_repo
+ else
+ echo "$1 is a local directory";
+ export INPUT_FORMAT=local_directory
+ fi
+
+
+}
diff --git a/dev-support/smart-apply-patch.sh b/dev-support/smart-apply-patch.sh
new file mode 100755
index 000000000000..9200e3ba921c
--- /dev/null
+++ b/dev-support/smart-apply-patch.sh
@@ -0,0 +1,96 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+PATCH_FILE=$1
+if [ -z "$PATCH_FILE" ]; then
+ echo usage: $0 patch-file
+ exit 1
+fi
+
+PATCH=${PATCH:-patch} # allow overriding patch binary
+
+# Cleanup handler for temporary files
+TOCLEAN=""
+cleanup() {
+ rm $TOCLEAN
+ exit $1
+}
+trap "cleanup 1" HUP INT QUIT TERM
+
+# Allow passing "-" for stdin patches
+if [ "$PATCH_FILE" == "-" ]; then
+ PATCH_FILE=/tmp/tmp.in.$$
+ cat /dev/fd/0 > $PATCH_FILE
+ TOCLEAN="$TOCLEAN $PATCH_FILE"
+fi
+
+# Come up with a list of changed files into $TMP
+TMP=/tmp/tmp.paths.$$
+TOCLEAN="$TOCLEAN $TMP"
+
+if $PATCH -p0 -E --dry-run < $PATCH_FILE 2>&1 > $TMP; then
+ PLEVEL=0
+ #if the patch applied at P0 there is the possability that all we are doing
+ # is adding new files and they would apply anywhere. So try to guess the
+ # correct place to put those files.
+
+# NOTE 2014/07/17:
+# Temporarily disabling below check since our jenkins boxes seems to be not defaulting to bash
+# causing below checks to fail. Once it is fixed, we can revert the commit and enable this again.
+
+# TMP2=/tmp/tmp.paths.2.$$
+# TOCLEAN="$TOCLEAN $TMP2"
+#
+# grep '^patching file ' $TMP | awk '{print $3}' | grep -v /dev/null | sort | uniq > $TMP2
+#
+# #first off check that all of the files do not exist
+# FOUND_ANY=0
+# for CHECK_FILE in $(cat $TMP2)
+# do
+# if [[ -f $CHECK_FILE ]]; then
+# FOUND_ANY=1
+# fi
+# done
+#
+# if [[ "$FOUND_ANY" = "0" ]]; then
+# #all of the files are new files so we have to guess where the correct place to put it is.
+#
+# # if all of the lines start with a/ or b/, then this is a git patch that
+# # was generated without --no-prefix
+# if ! grep -qv '^a/\|^b/' $TMP2 ; then
+# echo Looks like this is a git patch. Stripping a/ and b/ prefixes
+# echo and incrementing PLEVEL
+# PLEVEL=$[$PLEVEL + 1]
+# sed -i -e 's,^[ab]/,,' $TMP2
+# fi
+# fi
+elif $PATCH -p1 -E --dry-run < $PATCH_FILE 2>&1 > /dev/null; then
+ PLEVEL=1
+elif $PATCH -p2 -E --dry-run < $PATCH_FILE 2>&1 > /dev/null; then
+ PLEVEL=2
+else
+ echo "The patch does not appear to apply with p0 to p2";
+ cleanup 1;
+fi
+
+echo Going to apply patch with: $PATCH -p$PLEVEL
+$PATCH -p$PLEVEL -E < $PATCH_FILE
+
+cleanup $?
diff --git a/dev-support/test-patch.properties b/dev-support/test-patch.properties
index 6c3f5e37aaad..afe21d455dad 100644
--- a/dev-support/test-patch.properties
+++ b/dev-support/test-patch.properties
@@ -19,5 +19,5 @@ MAVEN_OPTS="-Xmx3g"
# Please update the per-module test-patch.properties if you update this file.
OK_RELEASEAUDIT_WARNINGS=84
-OK_FINDBUGS_WARNINGS=607
+OK_FINDBUGS_WARNINGS=768
OK_JAVADOC_WARNINGS=169
diff --git a/dev-support/test-patch.sh b/dev-support/test-patch.sh
old mode 100644
new mode 100755
index efa1fc795ed6..3c728454d035
--- a/dev-support/test-patch.sh
+++ b/dev-support/test-patch.sh
@@ -1,15 +1,20 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
#set -x
@@ -366,7 +371,7 @@ checkJavadocWarnings () {
echo "There appear to be $javadocWarnings javadoc warnings generated by the patched build."
### if current warnings greater than OK_JAVADOC_WARNINGS
- if [[ $javadocWarnings > $OK_JAVADOC_WARNINGS ]] ; then
+ if [[ $javadocWarnings -gt $OK_JAVADOC_WARNINGS ]] ; then
JIRA_COMMENT="$JIRA_COMMENT
-1 javadoc. The javadoc tool appears to have generated `expr $(($javadocWarnings-$OK_JAVADOC_WARNINGS))` warning messages."
@@ -545,7 +550,7 @@ $JIRA_COMMENT_FOOTER"
done
### if current warnings greater than OK_FINDBUGS_WARNINGS
- if [[ $findbugsWarnings > $OK_FINDBUGS_WARNINGS ]] ; then
+ if [[ $findbugsWarnings -gt $OK_FINDBUGS_WARNINGS ]] ; then
JIRA_COMMENT="$JIRA_COMMENT
-1 findbugs. The patch appears to introduce `expr $(($findbugsWarnings-$OK_FINDBUGS_WARNINGS))` new Findbugs (version ${findbugs_version}) warnings."
diff --git a/dev-support/test-util.sh b/dev-support/test-util.sh
index c37ab485acb9..9219bb96606c 100755
--- a/dev-support/test-util.sh
+++ b/dev-support/test-util.sh
@@ -1,8 +1,6 @@
#!/usr/bin/env bash
#
#/**
-# * Copyright 2007 The Apache Software Foundation
-# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
@@ -39,9 +37,13 @@ options:
-n N Run each test N times. Default = 1.
-s N Print N slowest tests
-H Print which tests are hanging (if any)
+ -e Echo the maven call before running. Default: not enabled
+ -r Runs remotely, on the build server. Default: not enabled
EOF
}
+echoUsage=0
+server=0
testFile=
doClean=""
testType=verify
@@ -59,7 +61,7 @@ else
fi
testDir=$scriptDir/../../../target/surefire-reports
-while getopts "hcHun:s:f:" OPTION
+while getopts "hcerHun:s:f:" OPTION
do
case $OPTION in
h)
@@ -84,6 +86,12 @@ do
f)
testFile=$OPTARG
;;
+ e)
+ echoUsage=1
+ ;;
+ r)
+ server=1
+ ;;
?)
usage
exit 1
@@ -124,15 +132,44 @@ do
#Now loop through each test
for (( j = 0; j < $numTests; j++ ))
do
- nice -10 mvn $doClean $testType -Dtest=${test[$j]}
- if [ $? -ne 0 ]; then
+ # Create the general command
+ cmd="nice -10 mvn $doClean $testType -Dtest=${test[$j]}"
+
+ # Add that is should run locally, if not on the server
+ if [ ${server} -eq 0 ]; then
+ cmd="${cmd} -P localTests"
+ fi
+
+ # Print the command, if we should
+ if [ ${echoUsage} -eq 1 ]; then
+ echo "${cmd}"
+ fi
+
+ # Run the command
+ $cmd
+
+ if [ $? -ne 0 ]; then
echo "${test[$j]} failed, iteration: $i"
exit 1
fi
done
else
echo "EXECUTING ALL TESTS"
- nice -10 mvn $doClean $testType
+ # Create the general command
+ cmd="nice -10 mvn $doClean $testType"
+
+ # Add that is should run locally, if not on the server
+ if [ ${server} -eq 0 ]; then
+ cmd="${cmd} -P localTests"
+ fi
+
+ # Print the command, if we should
+ if [ ${echoUsage} -eq 1 ]; then
+ echo "${cmd}"
+ fi
+
+ #now run the command
+ $cmd
fi
done
diff --git a/pom.xml b/pom.xml
index 4460d5d22461..ccbcfbbea81d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,4 +1,4 @@
-
+
+
+ org.apache.maven.plugins
+ maven-remote-resources-plugin
+ 1.5
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 2.3
+
+
+ org.apache.maven.plugins
+ maven-release-plugin
+
+
+
+ apache-release
+
+ -Dmaven.test.skip.exec
+
+ maven-compiler-plugin
+ 2.5.1${compileSource}truefalse
+ -Xlint:-options
@@ -349,15 +374,15 @@
- 900
+ ${surefire.timeout}-enableassertions -Xmx1900m -Djava.security.egd=file:/dev/./urandom
- true
+ ${test.output.tofile}org.apache.maven.pluginsmaven-site-plugin
- 2.0.1
+ 3.3org.apache.maven.plugins
@@ -375,14 +400,14 @@
${integrationtest.include}
- ${unittest.include}
+ ${unittest.include}**/*$*
- ${test.exclude.pattern}
- true
+ ${test.output.tofile}${env.LD_LIBRARY_PATH}:${project.build.directory}/nativelib${env.DYLD_LIBRARY_PATH}:${project.build.directory}/nativelib
+ 4
@@ -421,14 +446,19 @@
avro-maven-plugin${avro.version}
+
+ org.codehaus.mojo
+ buildnumber-maven-plugin
+ 1.3
+ org.codehaus.mojobuild-helper-maven-plugin1.5
-
org.eclipse.m2e
@@ -450,7 +480,7 @@
-
+
org.apache.maven.plugins
@@ -477,30 +507,71 @@
+
+
+ org.apache.maven.plugins
+ maven-remote-resources-plugin
+ [1.5,)
+
+ process
+
+
+
+
+
+
+
+
+ org.codehaus.mojo
+ buildnumber-maven-plugin
+ [1.3,)
+
+ create-timestamp
+
+
+
+
+ true
+ true
+
+
+ org.apache.rat
- apache-rat-plugin
+ apache-rat-plugin0.8
+ **/*.log**/.*
+ **/*.tgz
+ **/*.orig
+ test/**
+ **/8e8ab58dcf39412da19833fcd8f687ac**/.git/****/target/****/CHANGES.txt**/generated/**
- **/conf/*
+
+ conf/regionservers**/*.avpr
- **/*.svg
+ **/*.svg**/*.vm**/control**/conffiledocs/***/src/site/resources/css/freebsd_docbook.css
+
+ **/src/main/resources/META-INF/LEGAL
+
+ .git/**
+ .svn/**
+ **/patchprocess/**
@@ -515,6 +586,16 @@
hbase-default.xml
+
${project.build.directory}
@@ -529,9 +610,26 @@
hbase-site.xml
+
+
+ maven-site-plugin
+
+ UTF-8
+ UTF-8
+ src/site/site.vm
+
+ org.apache.avroavro-maven-plugin
@@ -549,6 +647,52 @@
${project.build.directory}/generated-sources/java
+
+ org.apache.maven.plugins
+ maven-enforcer-plugin
+ 1.0.1
+
+
+ org.codehaus.mojo
+ extra-enforcer-rules
+ ${extra.enforcer.version}
+
+
+
+
+
+
+
+ [${maven.min.version},)
+ Maven is out of date.
+ HBase requires at least version ${maven.min.version} of Maven to properly build from source.
+ You appear to be using an older version. You can use either "mvn -version" or
+ "mvn enforcer:display-info" to verify what version is active.
+ See the reference guide on building for more information: http://hbase.apache.org/book.html#build
+
+
+
+
+ [${java.min.version},)
+ Java is out of date.
+ HBase requirs at least version ${java.min.version} of the JDK to properly build from source.
+ You appear to be using an older version. You can use either "mvn -version" or
+ "mvn enforcer:display-info" to verify what version is active.
+ See the reference guide on building for more information: http://hbase.apache.org/book.html#build
+
+
+
+
+
+
+ enforce
+
+ enforce
+
+
+
+
+
org.codehaus.mojoxml-maven-plugin
@@ -590,7 +734,7 @@
100truetrue
- ${basedir}/target/site/book/
+ ${basedir}/target/site/../css/freebsd_docbook.csssrc/docbkx/customization.xsl../images/
@@ -689,6 +833,16 @@
jar-no-fork
+
@@ -729,6 +883,12 @@
falsealways
+
+ 1800
+ -enableassertions -Xmx1900m
+ -Djava.security.egd=file:/dev/./urandom -Djava.net.preferIPv4Stack=true
+ false
@@ -802,6 +962,18 @@
package="org.apache.hadoop.hbase.generated.regionserver"
webxml="${build.webapps}/regionserver/WEB-INF/web.xml"/>
+
+
+
+
+
+
@@ -839,8 +1011,14 @@
- if [ `ls ${project.build.directory}/nativelib | wc -l` -ne 0 ]; then
- cp -PR ${project.build.directory}/nativelib/lib* ${project.build.directory}/${project.build.finalName}/${project.build.finalName}/lib/native/${build.platform}
+ which cygpath 2> /dev/null
+ if [ $? = 1 ]; then
+ BUILD_DIR="${project.build.directory}"
+ else
+ BUILD_DIR=`cygpath --unix '${project.build.directory}'`
+ fi
+ if [ `ls $BUILD_DIR/nativelib | wc -l` -ne 0 ]; then
+ cp -PR $BUILD_DIR/nativelib/lib* $BUILD_DIR/${project.build.finalName}/${project.build.finalName}/lib/native/${build.platform}
fi
@@ -848,11 +1026,19 @@
-
-
-
-
+
+ which cygpath 2> /dev/null
+ if [ $? = 1 ]; then
+ BUILD_DIR="${project.build.directory}"
+ else
+ BUILD_DIR=`cygpath --unix '${project.build.directory}'`
+ fi
+
+ cd $BUILD_DIR/${project.build.finalName}
+ tar czf $BUILD_DIR/${project.build.finalName}.tar.gz ${project.build.finalName}
+
+
+
@@ -868,6 +1054,28 @@
build-helper-maven-plugin1.5
+
+ add-source
+
+ add-source
+
+
+
+
+
+
+
+
+ add-test-source
+
+ add-test-source
+
+
+
+
+
+
+ jspcSource-packageInfo-Avro-sourcegenerate-sources
@@ -925,45 +1133,172 @@
+
+ org.codehaus.mojo
+ buildnumber-maven-plugin
+
+
+ validate
+
+ create-timestamp
+
+
+
+
+ yyyy
+ build.year
+
+
+
+ org.apache.maven.plugins
+ maven-remote-resources-plugin
+
+
+
+ build-legal-for-assembly
+
+
+ process-sources
+
+ process
+
+
+ src/assembly
+ ${project.build.directory}/maven-shared-archive-resources-for-assembly
+
+ src/assembly/resource
+
+
+ src/assembly/resources/supplemental-models.xml
+
+
+ true
+ ${license.debug.print.included}
+
+
+ org.apache:apache-jar-resource-bundle:1.4
+
+ false
+ false
+ false
+
+
+
+
+
+
+
+ maven-dependency-plugin
+
+
+
+ unpack-dependency-notices
+ prepare-package
+
+ unpack-dependencies
+
+
+ true
+ **\/NOTICE,**\/NOTICE.txt
+
+
+
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+ 1.4.0
+
+
+ concat-NOTICE-files
+ prepare-package
+
+ exec
+
+
+ env
+
+ bash
+ -c
+ cat maven-shared-archive-resources-for-assembly/META-INF/NOTICE \
+ `find ${project.build.directory}/dependency -iname NOTICE -or -iname NOTICE.txt` \
+
+
+ ${project.build.directory}/NOTICE.aggregate
+ ${project.build.directory}
+
+
+
+
+
+
+ org.apache.felix
+ maven-bundle-plugin
+ 2.5.3
+ true
+ true
+
+
+ false
yyyy-MM-dd'T'HH:mm
-
- ${maven.build.timestamp}
- 1.6
-
+ ${maven.build.timestamp}
+
+ 3.0.3
+ ${compileSource}1.5.31.21.4
+
+ 3.2.23.12.12.51.1.12.11.6
- r09
- 1.5.5
+ 2.1.2
+ 11.0.2
+ 1.8.85.5.232.16.1.266.1.14
- 1.4
+ 1.81.6.5
- 4.10-HBASE-1
+ 4.11
+ 1.3
+ 1.4.31.2.161.8.52.4.0a
- 1.5.81.0.1
+ thrift0.8.0
- 3.4.3
+ 3.4.50.0.1-SNAPSHOT
+ 2.6.3/usr/etc/hbase
@@ -992,6 +1327,9 @@
org.apache.hadoop.hbase.SmallTestsorg.apache.hadoop.hbase.MediumTests
+ true
+ 900
+ 1.0-beta-3
@@ -1007,6 +1345,11 @@
+
+ com.yammer.metrics
+ metrics-core
+ ${metrics-core.version}
+ com.google.guavaguava
@@ -1032,6 +1375,11 @@
commons-codec${commons-codec.version}
+
+ commons-collections
+ commons-collections
+ ${commons-collections.version}
+ commons-httpclientcommons-httpclient
@@ -1262,7 +1610,9 @@
junitjunit${junit.version}
- test
+ runtime
+
+ trueorg.mockito
@@ -1407,6 +1757,17 @@
Mac_OS_X-${sun.arch.data.model}
+
+ os.windows
+
+
+ Windows
+
+
+
+ cygwin
+
+
@@ -1425,6 +1786,39 @@
+
+ org.apache.maven.plugins
+ maven-enforcer-plugin
+
+
+
+ ${compileSource}
+ HBase has unsupported dependencies.
+ HBase requires that all dependencies be compiled with version ${compileSource} or earlier
+ of the JDK to properly build from source. You appear to be using a newer dependency. You can use
+ either "mvn -version" or "mvn enforcer:display-info" to verify what version is active.
+ Non-release builds can temporarily build with a newer JDK version by setting the
+ 'compileSource' property (eg. mvn -DcompileSource=1.8 clean package).
+
+
+
+
+
+
+ maven-javadoc-plugin
+ 2.6.1
+
+ true
+
+
+
+ prepare-package
+
+ javadoc
+
+
+
+
@@ -1486,7 +1880,8 @@
- 1.0.0
+ 1.0.4
+ 1.4.3
@@ -1553,11 +1948,11 @@
-
+
- security
+ security-test
- 1.0.0
+ 1.0.4${project.artifactId}-${project.version}-security
@@ -1566,28 +1961,6 @@
org.codehaus.mojobuild-helper-maven-plugin
-
- add-source
-
- add-source
-
-
-
-
-
-
-
-
- add-test-source
-
- add-test-source
-
-
-
-
-
-
- add-test-resource
@@ -1612,28 +1985,28 @@
- hadoop-0.22
+ hadoop-1.1hadoop.profile
- 22
+ 1.1
- 0.22.0
+ 1.1.2
+ 1.4.3org.apache.hadoop
- hadoop-common
+ hadoop-core${hadoop.version}true
-
hsqldbhsqldb
@@ -1654,59 +2027,222 @@
orooro
-
- jdiff
- jdiff
-
-
- org.apache.lucene
- lucene-core
- org.apache.hadoop
- hadoop-hdfs
+ hadoop-test${hadoop.version}true
-
-
-
- hsqldb
- hsqldb
-
-
- net.sf.kosmosfs
- kfs
-
-
- org.eclipse.jdt
- core
-
-
- net.java.dev.jets3t
- jets3t
-
-
- oro
- oro
-
-
- jdiff
- jdiff
-
-
- org.apache.lucene
- lucene-core
-
-
+ test
-
- org.apache.hadoop
- hadoop-mapred
- ${hadoop.version}
- true
-
+
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-test-resource
+
+ add-test-resource
+
+
+
+
+ src/test/resources
+
+ hbase-site.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hadoop-1.2
+
+
+ hadoop.profile
+ 1.2
+
+
+
+ 1.2.1
+ 1.4.3
+
+
+
+ org.apache.hadoop
+ hadoop-core
+ ${hadoop.version}
+ true
+
+
+ hsqldb
+ hsqldb
+
+
+ net.sf.kosmosfs
+ kfs
+
+
+ org.eclipse.jdt
+ core
+
+
+ net.java.dev.jets3t
+ jets3t
+
+
+ oro
+ oro
+
+
+
+
+ org.apache.hadoop
+ hadoop-test
+ ${hadoop.version}
+ true
+ test
+
+
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-test-resource
+
+ add-test-resource
+
+
+
+
+ src/test/resources
+
+ hbase-site.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hadoop-0.22
+
+
+ hadoop.profile
+ 22
+
+
+
+ 0.22.0
+ 1.6.1
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop.version}
+ true
+
+
+
+ hsqldb
+ hsqldb
+
+
+ net.sf.kosmosfs
+ kfs
+
+
+ org.eclipse.jdt
+ core
+
+
+ net.java.dev.jets3t
+ jets3t
+
+
+ oro
+ oro
+
+
+ jdiff
+ jdiff
+
+
+ org.apache.lucene
+ lucene-core
+
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ ${hadoop.version}
+ true
+
+
+
+ hsqldb
+ hsqldb
+
+
+ net.sf.kosmosfs
+ kfs
+
+
+ org.eclipse.jdt
+ core
+
+
+ net.java.dev.jets3t
+ jets3t
+
+
+ oro
+ oro
+
+
+ jdiff
+ jdiff
+
+
+ org.apache.lucene
+ lucene-core
+
+
+
+
+ org.apache.hadoop
+ hadoop-mapred
+ ${hadoop.version}
+ true
+ hsqldb
@@ -1802,7 +2338,8 @@
- 0.23.1-SNAPSHOT
+ 0.23.7
+ 1.6.1
@@ -1870,6 +2407,269 @@
+
+
+ hadoop-0.24
+
+
+ hadoop.profile
+ 24
+
+
+
+ 0.24.0-SNAPSHOT
+ 1.6.1
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-annotations
+ ${hadoop.version}
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ ${hadoop.version}
+ compile
+
+
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-test-resource
+
+ add-test-resource
+
+
+
+
+ src/test/resources
+
+ hbase-site.xml
+
+
+
+
+
+
+
+
+ maven-dependency-plugin
+
+
+ create-mrapp-generated-classpath
+ generate-test-resources
+
+ build-classpath
+
+
+
+ ${project.build.directory}/test-classes/mrapp-generated-classpath
+
+
+
+
+
+
+
+
+
+
+ hadoop-2.0
+
+
+ hadoop.profile
+ 2.0
+
+
+
+ 2.0.0-alpha
+ 1.6.1
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-annotations
+ ${hadoop.version}
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ ${hadoop.version}
+ compile
+
+
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-test-resource
+
+ add-test-resource
+
+
+
+
+ src/test/resources
+
+ hbase-site.xml
+
+
+
+
+
+
+
+
+ maven-dependency-plugin
+
+
+ create-mrapp-generated-classpath
+ generate-test-resources
+
+ build-classpath
+
+
+
+ ${project.build.directory}/test-classes/mrapp-generated-classpath
+
+
+
+
+
+
+
+
+
+
+ hadoop-2.7
+
+
+ hadoop.profile
+ 2.7
+
+
+
+ 2.7.1
+ 1.6.1
+ 2.5.0
+ 2.4
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-annotations
+ ${hadoop.version}
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ ${hadoop.version}
+ compile
+
+
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-test-resource
+
+ add-test-resource
+
+
+
+
+ src/test/resources
+
+ hbase-site.xml
+
+
+
+
+
+
+
+
+ maven-dependency-plugin
+
+
+ create-mrapp-generated-classpath
+ generate-test-resources
+
+ build-classpath
+
+
+
+ ${project.build.directory}/test-classes/mrapp-generated-classpath
+
+
+
+
+
+
+
+
@@ -2019,6 +2819,61 @@
+
+
+
+ clover
+
+ false
+
+ clover
+
+
+
+ ${user.home}/.clover.license
+ 2.6.3
+
+
+
+
+ com.atlassian.maven.plugins
+ maven-clover2-plugin
+ ${clover.version}
+
+ true
+ true
+ 50%
+ true
+ true
+
+ **/generated/**
+
+
+
+
+ clover-setup
+ process-sources
+
+ setup
+
+
+
+ clover
+ site
+
+ clover
+
+
+
+
+
+
+
@@ -2026,7 +2881,7 @@
maven-project-info-reports-plugin
- 2.1.2
+ 2.6
@@ -2043,85 +2898,116 @@
- maven-site-plugin
- 2.0.1
-
- UTF-8
- UTF-8
- src/site/site.vm
-
-
-
+ org.apache.maven.pluginsmaven-javadoc-plugin
- 2.6.1
-
- true
-
+ 2.10.3
+
- default
+ devapi
- javadoc
+ aggregate
+
+ devapidocs
+ Developer API
+ The full HBase API, including private and unstable APIs
+
+ **/generated/*
+ **/protobuf/*
+ **/*.scala
+
+ *.generated.master:*.generated:org.apache.hadoop.hbase.tmpl.common:com.google.protobuf:org.apache.hadoop.hbase.spark
+ true
+ true
+ true
+ true
+ true
+ all
+ true
+
+ -J-Xmx2G
+
+
+
+ org.mockito
+ mockito-all
+ ${mockito-all.version}
+
+
+ org.hamcrest
+ hamcrest-core
+ ${hamcrest.version}
+
+
+ false
+
+
+
+
+
+ userapi
+
+ aggregate
+
+
+ apidocs
+ User API
+ The HBase Application Programmer's API
+
+ org.apache.hadoop.hbase.backup*:org.apache.hadoop.hbase.catalog:org.apache.hadoop.hbase.client.coprocessor:org.apache.hadoop.hbase.client.metrics:org.apache.hadoop.hbase.codec*:org.apache.hadoop.hbase.constraint:org.apache.hadoop.hbase.coprocessor.*:org.apache.hadoop.hbase.executor:org.apache.hadoop.hbase.fs:*.generated.*:org.apache.hadoop.hbase.io.hfile.*:org.apache.hadoop.hbase.mapreduce.hadoopbackport:org.apache.hadoop.hbase.mapreduce.replication:org.apache.hadoop.hbase.master.*:org.apache.hadoop.hbase.metrics*:org.apache.hadoop.hbase.migration:org.apache.hadoop.hbase.monitoring:org.apache.hadoop.hbase.p*:org.apache.hadoop.hbase.regionserver.compactions:org.apache.hadoop.hbase.regionserver.handler:org.apache.hadoop.hbase.regionserver.snapshot:org.apache.hadoop.hbase.replication.*:org.apache.hadoop.hbase.rest.filter:org.apache.hadoop.hbase.rest.model:org.apache.hadoop.hbase.rest.p*:org.apache.hadoop.hbase.security.*:org.apache.hadoop.hbase.thrift*:org.apache.hadoop.hbase.tmpl.*:org.apache.hadoop.hbase.tool:org.apache.hadoop.hbase.trace:org.apache.hadoop.hbase.util.byterange*:org.apache.hadoop.hbase.util.test:org.apache.hadoop.hbase.util.vint:org.apache.hadoop.hbase.zookeeper.lock:org.apache.hadoop.metrics2*
+
+
+ false
+
+
+ org.apache.hbase:hbase-annotations
+
+ ${project.reporting.outputDirectory}/devapidocs
+ Developer API
+ The full HBase API, including private and unstable APIs
+ **/generated/*
+ org.apache.hadoop.hbase.generated.master:org.apache.hadoop.hbase.protobuf.generated:org.apache.hadoop.hbase.tmpl.common
+ true
+ true
+ true
+ true
+ true
+ all
+ true
+
+ -J-Xmx2G
+
+
+
+ org.mockito
+ mockito-all
+ ${mockito-all.version}
+
+
+ org.hamcrest
+ hamcrest-core
+ ${hamcrest.version}
+
+
+ false
+
-
-
-
org.apache.maven.pluginsmaven-jxr-plugin
- 2.1
-
-
org.apache.ratapache-rat-plugin
@@ -2148,4 +3034,14 @@
+
+
+ hbase.apache.org
+ HBase Website at hbase.apache.org
+
+ file:///tmp
+
+
diff --git a/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureClient.java b/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureClient.java
index e85bf42220e1..70fe4b72595d 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureClient.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureClient.java
@@ -21,6 +21,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.UserProvider;
import org.apache.hadoop.hbase.security.HBaseSaslRpcClient;
import org.apache.hadoop.hbase.security.HBaseSaslRpcServer.AuthMethod;
import org.apache.hadoop.hbase.security.KerberosInfo;
@@ -40,6 +41,7 @@
import org.apache.hadoop.util.ReflectionUtils;
import javax.net.SocketFactory;
+import javax.security.sasl.SaslException;
import java.io.*;
import java.net.*;
import java.security.PrivilegedExceptionAction;
@@ -71,6 +73,10 @@ public class SecureClient extends HBaseClient {
private static final Log LOG =
LogFactory.getLog("org.apache.hadoop.ipc.SecureClient");
+ public static final String IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY =
+ "hbase.ipc.client.fallback-to-simple-auth-allowed";
+ public static final boolean IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT = false;
+
protected static Map> tokenHandlers =
new HashMap>();
static {
@@ -97,7 +103,7 @@ public SecureConnection(ConnectionId remoteId) throws IOException {
User ticket = remoteId.getTicket();
Class> protocol = remoteId.getProtocol();
- this.useSasl = User.isSecurityEnabled();
+ this.useSasl = userProvider.isHBaseSecurityEnabled();
if (useSasl && protocol != null) {
TokenInfo tokenInfo = protocol.getAnnotation(TokenInfo.class);
if (tokenInfo != null) {
@@ -172,7 +178,7 @@ private synchronized boolean shouldAuthenticateOverKrb() throws IOException {
private synchronized boolean setupSaslConnection(final InputStream in2,
final OutputStream out2)
throws IOException {
- saslRpcClient = new HBaseSaslRpcClient(authMethod, token, serverPrincipal);
+ saslRpcClient = new HBaseSaslRpcClient(authMethod, token, serverPrincipal, fallbackAllowed);
return saslRpcClient.saslConnect(in2, out2);
}
@@ -185,6 +191,14 @@ private synchronized boolean setupSaslConnection(final InputStream in2,
* again.
* The other problem is to do with ticket expiry. To handle that,
* a relogin is attempted.
+ *
+ * The retry logic is governed by the {@link #shouldAuthenticateOverKrb}
+ * method. In case when the user doesn't have valid credentials, we don't
+ * need to retry (from cache or ticket). In such cases, it is prudent to
+ * throw a runtime exception when we receive a SaslException from the
+ * underlying authentication implementation, so there is no retry from
+ * other high level (for eg, HCM or HBaseAdmin).
+ *
*/
private synchronized void handleSaslConnectionFailure(
final int currRetries,
@@ -222,8 +236,16 @@ public Object run() throws IOException, InterruptedException {
LOG.warn("Exception encountered while connecting to " +
"the server : " + ex);
}
- if (ex instanceof RemoteException)
+ if (ex instanceof RemoteException) {
throw (RemoteException)ex;
+ }
+ if (ex instanceof SaslException) {
+ String msg = "SASL authentication failed." +
+ " The most likely cause is missing or invalid credentials." +
+ " Consider 'kinit'.";
+ LOG.fatal(msg, ex);
+ throw new RuntimeException(msg, ex);
+ }
throw new IOException(ex);
}
});
@@ -246,7 +268,7 @@ protected synchronized void setupIOstreams()
while (true) {
setupConnection();
InputStream inStream = NetUtils.getInputStream(socket);
- OutputStream outStream = NetUtils.getOutputStream(socket);
+ OutputStream outStream = NetUtils.getOutputStream(socket, pingInterval);
writeRpcHeader(outStream);
if (useSasl) {
final InputStream in2 = inStream;
@@ -255,7 +277,7 @@ protected synchronized void setupIOstreams()
if (authMethod == AuthMethod.KERBEROS) {
UserGroupInformation ugi = ticket.getUGI();
if (ugi != null && ugi.getRealUser() != null) {
- ticket = User.create(ugi.getRealUser());
+ ticket = userProvider.create(ugi.getRealUser());
}
}
boolean continueSasl = false;
@@ -271,8 +293,11 @@ public Boolean run() throws IOException {
if (rand == null) {
rand = new Random();
}
- handleSaslConnectionFailure(numRetries++, MAX_RETRIES, ex, rand,
- ticket);
+ try {
+ handleSaslConnectionFailure(numRetries++, MAX_RETRIES, ex, rand, ticket);
+ } catch (InterruptedException e) {
+ throw new IOException(e);
+ }
continue;
}
if (continueSasl) {
@@ -300,7 +325,14 @@ public Boolean run() throws IOException {
start();
return;
}
- } catch (IOException e) {
+ } catch (Throwable t) {
+ failedServers.addToFailedServers(remoteId.address);
+ IOException e;
+ if (t instanceof IOException) {
+ e = (IOException)t;
+ } else {
+ e = new IOException("Could not set up Secure IO Streams", t);
+ }
markClosed(e);
close();
@@ -346,7 +378,11 @@ protected void receiveResponse() {
if (LOG.isDebugEnabled())
LOG.debug(getName() + " got value #" + id);
- Call call = calls.remove(id);
+ // we first get the call by id, then remove it from call map after processed.
+ // If we remove the call here, thread waiting on the call can not be notified
+ // if any we encounter any exception in the 'try' block. Refer to 'receiveResponse'
+ // in org.apache.hadoop.hbase.ipc.HBaseClient.java
+ Call call = calls.get(id);
int state = in.readInt(); // read call status
if (LOG.isDebugEnabled()) {
@@ -358,15 +394,28 @@ protected void receiveResponse() {
if (LOG.isDebugEnabled()) {
LOG.debug("call #"+id+", response is:\n"+value.toString());
}
- call.setValue(value);
+ // it's possible that this call may have been cleaned up due to a RPC
+ // timeout, so check if it still exists before setting the value.
+ if (call != null) {
+ call.setValue(value);
+ }
} else if (state == Status.ERROR.state) {
- call.setException(new RemoteException(WritableUtils.readString(in),
- WritableUtils.readString(in)));
+ if (call != null) {
+ call.setException(new RemoteException(WritableUtils.readString(in), WritableUtils
+ .readString(in)));
+ }
} else if (state == Status.FATAL.state) {
+ RemoteException exception = new RemoteException(WritableUtils.readString(in),
+ WritableUtils.readString(in));
+ // the call will be removed from call map, we must set Exception here to notify
+ // the thread waited on the call
+ if (call != null) {
+ call.setException(exception);
+ }
// Close the connection
- markClosed(new RemoteException(WritableUtils.readString(in),
- WritableUtils.readString(in)));
+ markClosed(exception);
}
+ calls.remove(id);
} catch (IOException e) {
if (e instanceof SocketTimeoutException && remoteId.rpcTimeout > 0) {
// Clean up open calls but don't treat this as a fatal condition,
@@ -395,9 +444,7 @@ protected synchronized void close() {
// release the resources
// first thing to do;take the connection out of the connection list
synchronized (connections) {
- if (connections.get(remoteId) == this) {
- connections.remove(remoteId);
- }
+ connections.removeValue(remoteId, this);
}
// close the streams and therefore the socket
@@ -430,6 +477,9 @@ protected synchronized void close() {
}
}
+ private final boolean fallbackAllowed;
+ private UserProvider userProvider;
+
/**
* Construct an IPC client whose values are of the given {@link org.apache.hadoop.io.Writable}
* class.
@@ -438,8 +488,15 @@ protected synchronized void close() {
* @param factory socket factory
*/
public SecureClient(Class extends Writable> valueClass, Configuration conf,
- SocketFactory factory) {
+ SocketFactory factory, UserProvider provider) {
super(valueClass, conf, factory);
+ this.fallbackAllowed =
+ conf.getBoolean(IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY,
+ IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("fallbackAllowed=" + this.fallbackAllowed);
+ }
+ this.userProvider = provider;
}
/**
@@ -447,42 +504,17 @@ public SecureClient(Class extends Writable> valueClass, Configuration conf,
* @param valueClass value class
* @param conf configuration
*/
- public SecureClient(Class extends Writable> valueClass, Configuration conf) {
- this(valueClass, conf, NetUtils.getDefaultSocketFactory(conf));
+ public SecureClient(Class extends Writable> valueClass, Configuration conf,
+ UserProvider provider) {
+ this(valueClass, conf, NetUtils.getDefaultSocketFactory(conf), provider);
}
+ /**
+ * Creates a SecureConnection. Can be overridden by a subclass for testing.
+ * @param remoteId - the ConnectionId to use for the connection creation.
+ */
@Override
- protected SecureConnection getConnection(InetSocketAddress addr,
- Class extends VersionedProtocol> protocol,
- User ticket,
- int rpcTimeout,
- Call call)
- throws IOException, InterruptedException {
- if (!running.get()) {
- // the client is stopped
- throw new IOException("The client is stopped");
- }
- SecureConnection connection;
- /* we could avoid this allocation for each RPC by having a
- * connectionsId object and with set() method. We need to manage the
- * refs for keys in HashMap properly. For now its ok.
- */
- ConnectionId remoteId = new ConnectionId(addr, protocol, ticket, rpcTimeout);
- do {
- synchronized (connections) {
- connection = (SecureConnection)connections.get(remoteId);
- if (connection == null) {
- connection = new SecureConnection(remoteId);
- connections.put(remoteId, connection);
- }
- }
- } while (!connection.addCall(call));
-
- //we don't invoke the method below inside "synchronized (connections)"
- //block above. The reason for that is if the server happens to be slow,
- //it will take longer to establish a connection and that will slow the
- //entire system down.
- connection.setupIOstreams();
- return connection;
+ protected SecureConnection createConnection(ConnectionId remoteId) throws IOException {
+ return new SecureConnection(remoteId);
}
-}
\ No newline at end of file
+}
diff --git a/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureConnectionHeader.java b/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureConnectionHeader.java
index 506082151997..cfcaf53f0ac8 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureConnectionHeader.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureConnectionHeader.java
@@ -21,9 +21,12 @@
import java.io.DataOutput;
import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.UserProvider;
import org.apache.hadoop.hbase.security.HBaseSaslRpcServer.AuthMethod;
-import org.apache.hadoop.io.Text;
import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.UserGroupInformation;
/**
@@ -61,14 +64,16 @@ public void readFields(DataInput in) throws IOException {
if (ugiUsernamePresent) {
String username = in.readUTF();
boolean realUserNamePresent = in.readBoolean();
+ Configuration conf = HBaseConfiguration.create();
+ UserProvider provider = UserProvider.instantiate(conf);
if (realUserNamePresent) {
String realUserName = in.readUTF();
UserGroupInformation realUserUgi =
UserGroupInformation.createRemoteUser(realUserName);
- user = User.create(
+ user = provider.create(
UserGroupInformation.createProxyUser(username, realUserUgi));
} else {
- user = User.create(UserGroupInformation.createRemoteUser(username));
+ user = provider.create(UserGroupInformation.createRemoteUser(username));
}
} else {
user = null;
diff --git a/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureRpcEngine.java b/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureRpcEngine.java
index 8219bea79930..5e6e0e727c63 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureRpcEngine.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureRpcEngine.java
@@ -20,10 +20,8 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.Server;
-import org.apache.hadoop.hbase.client.RetriesExhaustedException;
+import org.apache.hadoop.hbase.client.UserProvider;
import org.apache.hadoop.hbase.io.HbaseObjectWritable;
import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler;
import org.apache.hadoop.hbase.security.HBasePolicyProvider;
@@ -32,20 +30,11 @@
import org.apache.hadoop.hbase.security.token.AuthenticationTokenSecretManager;
import org.apache.hadoop.hbase.util.Objects;
import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.metrics.util.MetricsTimeVaryingRate;
-import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.authorize.ServiceAuthorizationManager;
-import javax.net.SocketFactory;
-import java.io.DataInput;
-import java.io.DataOutput;
import java.io.IOException;
import java.lang.reflect.*;
-import java.net.ConnectException;
import java.net.InetSocketAddress;
-import java.net.SocketTimeoutException;
-import java.util.HashMap;
-import java.util.Map;
/**
* A loadable RPC engine supporting SASL authentication of connections, using
@@ -63,94 +52,48 @@
*/
public class SecureRpcEngine implements RpcEngine {
// Leave this out in the hadoop ipc package but keep class name. Do this
- // so that we dont' get the logging of this class's invocations by doing our
+ // so that we do not get the logging of this class' invocations by doing our
// blanket enabling DEBUG on the o.a.h.h. package.
protected static final Log LOG =
LogFactory.getLog("org.apache.hadoop.ipc.SecureRpcEngine");
- private SecureRpcEngine() {
- super();
- } // no public ctor
+ private Configuration conf;
+ private SecureClient client;
+ private UserProvider provider;
- /* Cache a client using its socket factory as the hash key */
- static private class ClientCache {
- private Map clients =
- new HashMap();
-
- protected ClientCache() {}
-
- /**
- * Construct & cache an IPC client with the user-provided SocketFactory
- * if no cached client exists.
- *
- * @param conf Configuration
- * @param factory socket factory
- * @return an IPC client
- */
- protected synchronized SecureClient getClient(Configuration conf,
- SocketFactory factory) {
- // Construct & cache client. The configuration is only used for timeout,
- // and Clients have connection pools. So we can either (a) lose some
- // connection pooling and leak sockets, or (b) use the same timeout for all
- // configurations. Since the IPC is usually intended globally, not
- // per-job, we choose (a).
- SecureClient client = clients.get(factory);
- if (client == null) {
- // Make an hbase client instead of hadoop Client.
- client = new SecureClient(HbaseObjectWritable.class, conf, factory);
- clients.put(factory, client);
- } else {
- client.incCount();
- }
- return client;
- }
-
- /**
- * Construct & cache an IPC client with the default SocketFactory
- * if no cached client exists.
- *
- * @param conf Configuration
- * @return an IPC client
- */
- protected synchronized SecureClient getClient(Configuration conf) {
- return getClient(conf, SocketFactory.getDefault());
+ @Override
+ public void setConf(Configuration config) {
+ this.conf = config;
+ this.provider = UserProvider.instantiate(config);
+ if (provider.isHBaseSecurityEnabled()) {
+ HBaseSaslRpcServer.init(conf);
}
-
- /**
- * Stop a RPC client connection
- * A RPC client is closed only when its reference count becomes zero.
- * @param client client to stop
- */
- protected void stopClient(SecureClient client) {
- synchronized (this) {
- client.decCount();
- if (client.isZeroReference()) {
- clients.remove(client.getSocketFactory());
- }
- }
- if (client.isZeroReference()) {
- client.stop();
- }
+ // check for an already created client
+ if (this.client != null) {
+ this.client.stop();
}
+ this.client = new SecureClient(HbaseObjectWritable.class, conf, provider);
}
- protected final static ClientCache CLIENTS = new ClientCache();
+ @Override
+ public Configuration getConf() {
+ return this.conf;
+ }
private static class Invoker implements InvocationHandler {
private Class extends VersionedProtocol> protocol;
private InetSocketAddress address;
private User ticket;
private SecureClient client;
- private boolean isClosed = false;
final private int rpcTimeout;
- public Invoker(Class extends VersionedProtocol> protocol,
- InetSocketAddress address, User ticket,
- Configuration conf, SocketFactory factory, int rpcTimeout) {
+ public Invoker(SecureClient client,
+ Class extends VersionedProtocol> protocol,
+ InetSocketAddress address, User ticket, int rpcTimeout) {
this.protocol = protocol;
this.address = address;
this.ticket = ticket;
- this.client = CLIENTS.getClient(conf, factory);
+ this.client = client;
this.rpcTimeout = rpcTimeout;
}
@@ -162,7 +105,7 @@ public Object invoke(Object proxy, Method method, Object[] args)
startTime = System.currentTimeMillis();
}
HbaseObjectWritable value = (HbaseObjectWritable)
- client.call(new Invocation(method, args), address,
+ client.call(new Invocation(method, protocol, args), address,
protocol, ticket, rpcTimeout);
if (logDebug) {
long callTime = System.currentTimeMillis() - startTime;
@@ -170,14 +113,6 @@ public Object invoke(Object proxy, Method method, Object[] args)
}
return value.get();
}
-
- /* close the IPC client that's responsible for this invoker's RPCs */
- synchronized protected void close() {
- if (!isClosed) {
- isClosed = true;
- CLIENTS.stopClient(client);
- }
- }
}
/**
@@ -187,24 +122,30 @@ synchronized protected void close() {
* @param protocol interface
* @param clientVersion version we are expecting
* @param addr remote address
- * @param ticket ticket
* @param conf configuration
- * @param factory socket factory
* @return proxy
* @throws java.io.IOException e
*/
- public VersionedProtocol getProxy(
- Class extends VersionedProtocol> protocol, long clientVersion,
- InetSocketAddress addr, User ticket,
- Configuration conf, SocketFactory factory, int rpcTimeout)
+ @Override
+ public T getProxy(
+ Class protocol, long clientVersion,
+ InetSocketAddress addr,
+ Configuration conf, int rpcTimeout)
throws IOException {
- if (User.isSecurityEnabled()) {
- HBaseSaslRpcServer.init(conf);
+ if (this.client == null) {
+ throw new IOException("Client must be initialized by calling setConf(Configuration)");
}
- VersionedProtocol proxy =
- (VersionedProtocol) Proxy.newProxyInstance(
+
+ T proxy =
+ (T) Proxy.newProxyInstance(
protocol.getClassLoader(), new Class[] { protocol },
- new Invoker(protocol, addr, ticket, conf, factory, rpcTimeout));
+ new Invoker(this.client, protocol, addr, provider.getCurrent(),
+ HBaseRPC.getRpcTimeout(rpcTimeout)));
+ /*
+ * TODO: checking protocol version only needs to be done once when we setup a new
+ * SecureClient.Connection. Doing it every time we retrieve a proxy instance is resulting
+ * in unnecessary RPC traffic.
+ */
long serverVersion = proxy.getProtocolVersion(protocol.getName(),
clientVersion);
if (serverVersion != clientVersion) {
@@ -214,50 +155,48 @@ public VersionedProtocol getProxy(
return proxy;
}
- /**
- * Stop this proxy and release its invoker's resource
- * @param proxy the proxy to be stopped
- */
- public void stopProxy(VersionedProtocol proxy) {
- if (proxy!=null) {
- ((Invoker)Proxy.getInvocationHandler(proxy)).close();
- }
- }
-
-
/** Expert: Make multiple, parallel calls to a set of servers. */
+ @Override
public Object[] call(Method method, Object[][] params,
InetSocketAddress[] addrs,
Class extends VersionedProtocol> protocol,
User ticket, Configuration conf)
throws IOException, InterruptedException {
+ if (this.client == null) {
+ throw new IOException("Client must be initialized by calling setConf(Configuration)");
+ }
Invocation[] invocations = new Invocation[params.length];
- for (int i = 0; i < params.length; i++)
- invocations[i] = new Invocation(method, params[i]);
- SecureClient client = CLIENTS.getClient(conf);
- try {
- Writable[] wrappedValues =
- client.call(invocations, addrs, protocol, ticket);
-
- if (method.getReturnType() == Void.TYPE) {
- return null;
- }
+ for (int i = 0; i < params.length; i++) {
+ invocations[i] = new Invocation(method, protocol, params[i]);
+ }
+
+ Writable[] wrappedValues =
+ client.call(invocations, addrs, protocol, ticket);
+
+ if (method.getReturnType() == Void.TYPE) {
+ return null;
+ }
- Object[] values =
- (Object[])Array.newInstance(method.getReturnType(), wrappedValues.length);
- for (int i = 0; i < values.length; i++)
- if (wrappedValues[i] != null)
- values[i] = ((HbaseObjectWritable)wrappedValues[i]).get();
+ Object[] values =
+ (Object[])Array.newInstance(method.getReturnType(), wrappedValues.length);
+ for (int i = 0; i < values.length; i++)
+ if (wrappedValues[i] != null)
+ values[i] = ((HbaseObjectWritable)wrappedValues[i]).get();
+
+ return values;
+ }
- return values;
- } finally {
- CLIENTS.stopClient(client);
+ @Override
+ public void close() {
+ if (this.client != null) {
+ this.client.stop();
}
}
/** Construct a server for a protocol implementation instance listening on a
* port and address, with a secret manager. */
+ @Override
public Server getServer(Class extends VersionedProtocol> protocol,
final Object instance,
Class>[] ifaces,
diff --git a/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureServer.java b/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureServer.java
index 0766f5d23ebb..c5fe8c1640c7 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureServer.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/ipc/SecureServer.java
@@ -21,6 +21,8 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.UserProvider;
import org.apache.hadoop.hbase.io.HbaseObjectWritable;
import org.apache.hadoop.hbase.io.WritableWithSize;
import org.apache.hadoop.hbase.security.HBaseSaslRpcServer;
@@ -47,6 +49,8 @@
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
+import com.google.common.collect.ImmutableSet;
+
import javax.security.sasl.Sasl;
import javax.security.sasl.SaslException;
import javax.security.sasl.SaslServer;
@@ -83,20 +87,23 @@ public abstract class SecureServer extends HBaseServer {
// 3 : Introduce the protocol into the RPC connection header
// 4 : Introduced SASL security layer
public static final byte CURRENT_VERSION = 4;
+ public static final Set INSECURE_VERSIONS = ImmutableSet.of(3);
+
+ public static final Log LOG = LogFactory.getLog(SecureServer.class);
+ private static final Log AUDITLOG = LogFactory.getLog("SecurityLogger." +
+ SecureServer.class.getName());
- public static final Log LOG = LogFactory.getLog("org.apache.hadoop.ipc.SecureServer");
- private static final Log AUDITLOG =
- LogFactory.getLog("SecurityLogger.org.apache.hadoop.ipc.SecureServer");
private static final String AUTH_FAILED_FOR = "Auth failed for ";
private static final String AUTH_SUCCESSFUL_FOR = "Auth successful for ";
protected SecretManager secretManager;
protected ServiceAuthorizationManager authManager;
+ private UserProvider userProvider;
protected class SecureCall extends HBaseServer.Call {
public SecureCall(int id, Writable param, Connection connection,
- Responder responder) {
- super(id, param, connection, responder);
+ Responder responder, long size) {
+ super(id, param, connection, responder, size);
}
@Override
@@ -168,9 +175,10 @@ private void wrapWithSasl(ByteBufferOutputStream response)
token = ((SecureConnection)connection).saslServer.wrap(buf.array(),
buf.arrayOffset(), buf.remaining());
}
- if (LOG.isDebugEnabled())
- LOG.debug("Adding saslServer wrapped token of size " + token.length
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Adding saslServer wrapped token of size " + token.length
+ " as call response.");
+ }
buf.clear();
DataOutputStream saslOut = new DataOutputStream(response);
saslOut.writeInt(token.length);
@@ -205,7 +213,7 @@ public class SecureConnection extends HBaseServer.Connection {
private final int AUTHORIZATION_FAILED_CALLID = -1;
// Fake 'call' for SASL context setup
private static final int SASL_CALLID = -33;
- private final SecureCall saslCall = new SecureCall(SASL_CALLID, null, this, null);
+ private final SecureCall saslCall = new SecureCall(SASL_CALLID, null, this, null, 0);
private boolean useWrap = false;
@@ -246,9 +254,9 @@ private User getAuthorizedUgi(String authorizedId)
"Can't retrieve username from tokenIdentifier.");
}
ugi.addTokenIdentifier(tokenId);
- return User.create(ugi);
+ return userProvider.create(ugi);
} else {
- return User.create(UserGroupInformation.createRemoteUser(authorizedId));
+ return userProvider.create(UserGroupInformation.createRemoteUser(authorizedId));
}
}
@@ -273,8 +281,9 @@ HBaseSaslRpcServer.SASL_PROPS, new SaslDigestCallbackHandler(
UserGroupInformation current = UserGroupInformation
.getCurrentUser();
String fullName = current.getUserName();
- if (LOG.isDebugEnabled())
- LOG.debug("Kerberos principal name is " + fullName);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Kerberos principal name is " + fullName);
+ }
final String names[] = HBaseSaslRpcServer.splitKerberosName(fullName);
if (names.length != 3) {
throw new AccessControlException(
@@ -295,13 +304,15 @@ public Object run() throws SaslException {
throw new AccessControlException(
"Unable to find SASL server implementation for "
+ authMethod.getMechanismName());
- if (LOG.isDebugEnabled())
- LOG.debug("Created SASL server with mechanism = "
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Created SASL server with mechanism = "
+ authMethod.getMechanismName());
+ }
}
- if (LOG.isDebugEnabled())
- LOG.debug("Have read input token of size " + saslToken.length
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Have read input token of size " + saslToken.length
+ " for processing by saslServer.evaluateResponse()");
+ }
replyToken = saslServer.evaluateResponse(saslToken);
} catch (IOException e) {
IOException sendToClient = e;
@@ -322,28 +333,33 @@ public Object run() throws SaslException {
throw e;
}
if (replyToken != null) {
- if (LOG.isDebugEnabled())
- LOG.debug("Will send token of size " + replyToken.length
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Will send token of size " + replyToken.length
+ " from saslServer.");
+ }
doSaslReply(SaslStatus.SUCCESS, new BytesWritable(replyToken), null,
null);
}
if (saslServer.isComplete()) {
- LOG.debug("SASL server context established. Negotiated QoP is "
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("SASL server context established. Negotiated QoP is "
+ saslServer.getNegotiatedProperty(Sasl.QOP));
+ }
String qop = (String) saslServer.getNegotiatedProperty(Sasl.QOP);
useWrap = qop != null && !"auth".equalsIgnoreCase(qop);
ticket = getAuthorizedUgi(saslServer.getAuthorizationID());
- LOG.debug("SASL server successfully authenticated client: " + ticket);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("SASL server successfully authenticated client: " + ticket);
+ }
rpcMetrics.authenticationSuccesses.inc();
- AUDITLOG.trace(AUTH_SUCCESSFUL_FOR + ticket);
+ AUDITLOG.info(AUTH_SUCCESSFUL_FOR + ticket);
saslContextEstablished = true;
}
} else {
- if (LOG.isDebugEnabled())
- LOG.debug("Have read input token of size " + saslToken.length
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Have read input token of size " + saslToken.length
+ " for processing by saslServer.unwrap()");
-
+ }
if (!useWrap) {
processOneRpc(saslToken);
} else {
@@ -400,10 +416,17 @@ public int readAndProcess() throws IOException, InterruptedException {
dataLengthBuffer.flip();
if (!HEADER.equals(dataLengthBuffer) || version != CURRENT_VERSION) {
//Warning is ok since this is not supposed to happen.
- LOG.warn("Incorrect header or version mismatch from " +
- hostAddress + ":" + remotePort +
- " got version " + version +
- " expected version " + CURRENT_VERSION);
+ if (INSECURE_VERSIONS.contains(version)) {
+ LOG.warn("An insecure client (version '" + version + "') is attempting to connect " +
+ " to this version '" + CURRENT_VERSION + "' secure server from " +
+ hostAddress + ":" + remotePort);
+ } else {
+ LOG.warn("Incorrect header or version mismatch from " +
+ hostAddress + ":" + remotePort +
+ " got version " + version +
+ " expected version " + CURRENT_VERSION);
+ }
+
return -1;
}
dataLengthBuffer.clear();
@@ -414,7 +437,7 @@ public int readAndProcess() throws IOException, InterruptedException {
AccessControlException ae = new AccessControlException(
"Authentication is required");
SecureCall failedCall = new SecureCall(AUTHORIZATION_FAILED_CALLID, null, this,
- null);
+ null, 0);
failedCall.setResponse(null, Status.FATAL, ae.getClass().getName(),
ae.getMessage());
responder.doRespond(failedCall);
@@ -519,7 +542,8 @@ private void processHeader(byte[] buf) throws IOException {
// for simple auth or kerberos auth
// The user is the real user. Now we create a proxy user
UserGroupInformation realUser = ticket.getUGI();
- ticket = User.create(
+ ticket =
+ userProvider.create(
UserGroupInformation.createProxyUser(protocolUser.getName(),
realUser));
// Now the user is a proxy user, set Authentication method Proxy.
@@ -547,8 +571,9 @@ private void processUnwrappedData(byte[] inBuf) throws IOException,
int unwrappedDataLength = unwrappedDataLengthBuffer.getInt();
if (unwrappedDataLength == HBaseClient.PING_CALL_ID) {
- if (LOG.isDebugEnabled())
- LOG.debug("Received ping message");
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Received ping message");
+ }
unwrappedDataLengthBuffer.clear();
continue; // ping message
}
@@ -587,20 +612,39 @@ protected void processData(byte[] buf) throws IOException, InterruptedException
DataInputStream dis =
new DataInputStream(new ByteArrayInputStream(buf));
int id = dis.readInt(); // try to read an id
+ long callSize = buf.length;
- if (LOG.isDebugEnabled()) {
- LOG.debug(" got #" + id);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace(" got #" + id);
+ }
+
+ // Enforcing the call queue size, this triggers a retry in the client
+ if ((callSize + callQueueSize.get()) > maxQueueSize) {
+ final SecureCall callTooBig =
+ new SecureCall(id, null, this, responder, callSize);
+ ByteArrayOutputStream responseBuffer = new ByteArrayOutputStream();
+ setupResponse(responseBuffer, callTooBig, Status.FATAL, null,
+ IOException.class.getName(),
+ "Call queue is full, is ipc.server.max.callqueue.size too small?");
+ responder.doRespond(callTooBig);
+ return;
}
Writable param = ReflectionUtils.newInstance(paramClass, conf); // read param
param.readFields(dis);
- SecureCall call = new SecureCall(id, param, this, responder);
+ SecureCall call = new SecureCall(id, param, this, responder, callSize);
+ callQueueSize.add(callSize);
if (priorityCallQueue != null && getQosLevel(param) > highPriorityLevel) {
priorityCallQueue.put(call);
+ updateCallQueueLenMetrics(priorityCallQueue);
+ } else if (replicationQueue != null && getQosLevel(param) == HConstants.REPLICATION_QOS) {
+ replicationQueue.put(call);
+ updateCallQueueLenMetrics(replicationQueue);
} else {
callQueue.put(call); // queue the call; maybe blocked here
+ updateCallQueueLenMetrics(callQueue);
}
}
@@ -620,10 +664,12 @@ private boolean authorizeConnection() throws IOException {
}
rpcMetrics.authorizationSuccesses.inc();
} catch (AuthorizationException ae) {
- LOG.debug("Connection authorization failed: "+ae.getMessage(), ae);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Connection authorization failed: "+ae.getMessage(), ae);
+ }
rpcMetrics.authorizationFailures.inc();
SecureCall failedCall = new SecureCall(AUTHORIZATION_FAILED_CALLID, null, this,
- null);
+ null, 0);
failedCall.setResponse(null, Status.FATAL, ae.getClass().getName(),
ae.getMessage());
responder.doRespond(failedCall);
@@ -661,8 +707,8 @@ protected SecureServer(String bindAddress, int port,
conf, serverName, highPriorityLevel);
this.authorize =
conf.getBoolean(HADOOP_SECURITY_AUTHORIZATION, false);
- this.isSecurityEnabled = UserGroupInformation.isSecurityEnabled();
- LOG.debug("security enabled="+isSecurityEnabled);
+ this.userProvider = UserProvider.instantiate(this.conf);
+ this.isSecurityEnabled = userProvider.isHBaseSecurityEnabled();
if (isSecurityEnabled) {
HBaseSaslRpcServer.init(conf);
@@ -725,4 +771,4 @@ public void authorize(User user,
protocol, getConf(), addr);
}
}
-}
\ No newline at end of file
+}
diff --git a/security/src/main/java/org/apache/hadoop/hbase/security/HBasePolicyProvider.java b/security/src/main/java/org/apache/hadoop/hbase/security/HBasePolicyProvider.java
index 0c4b4cbbaee0..cf1d3f1bf27a 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/security/HBasePolicyProvider.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/security/HBasePolicyProvider.java
@@ -44,7 +44,7 @@ public Service[] getServices() {
public static void init(Configuration conf,
ServiceAuthorizationManager authManager) {
// set service-level authorization security policy
- conf.set("hadoop.policy.file", "hbase-policy.xml");
+ System.setProperty("hadoop.policy.file", "hbase-policy.xml");
if (conf.getBoolean(
ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, false)) {
authManager.refresh(conf, new HBasePolicyProvider());
diff --git a/security/src/main/java/org/apache/hadoop/hbase/security/HBaseSaslRpcClient.java b/security/src/main/java/org/apache/hadoop/hbase/security/HBaseSaslRpcClient.java
index 809097305b45..c1eb055b1907 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/security/HBaseSaslRpcClient.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/security/HBaseSaslRpcClient.java
@@ -56,6 +56,7 @@ public class HBaseSaslRpcClient {
public static final Log LOG = LogFactory.getLog(HBaseSaslRpcClient.class);
private final SaslClient saslClient;
+ private final boolean fallbackAllowed;
/**
* Create a HBaseSaslRpcClient for an authentication method
@@ -66,8 +67,9 @@ public class HBaseSaslRpcClient {
* token to use if needed by the authentication method
*/
public HBaseSaslRpcClient(AuthMethod method,
- Token extends TokenIdentifier> token, String serverPrincipal)
- throws IOException {
+ Token extends TokenIdentifier> token, String serverPrincipal,
+ boolean fallbackAllowed) throws IOException {
+ this.fallbackAllowed = fallbackAllowed;
switch (method) {
case DIGEST:
if (LOG.isDebugEnabled())
@@ -148,8 +150,14 @@ public boolean saslConnect(InputStream inS, OutputStream outS)
readStatus(inStream);
int len = inStream.readInt();
if (len == HBaseSaslRpcServer.SWITCH_TO_SIMPLE_AUTH) {
- if (LOG.isDebugEnabled())
+ if (!fallbackAllowed) {
+ throw new IOException("Server asks us to fall back to SIMPLE auth,"
+ + " but this client is configured to only allow secure"
+ + " connections.");
+ }
+ if (LOG.isDebugEnabled()) {
LOG.debug("Server asks us to fall back to simple auth.");
+ }
saslClient.dispose();
return false;
}
diff --git a/security/src/main/java/org/apache/hadoop/hbase/security/access/AccessControlLists.java b/security/src/main/java/org/apache/hadoop/hbase/security/access/AccessControlLists.java
index fa001895b3ea..5b4b53d32ff9 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/security/access/AccessControlLists.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/security/access/AccessControlLists.java
@@ -41,6 +41,9 @@
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.InternalScanner;
import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
+import org.apache.hadoop.hbase.filter.RegexStringComparator;
+import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.io.Text;
@@ -79,6 +82,7 @@ public class AccessControlLists {
/** Internal storage table for access control lists */
public static final String ACL_TABLE_NAME_STR = "_acl_";
public static final byte[] ACL_TABLE_NAME = Bytes.toBytes(ACL_TABLE_NAME_STR);
+ public static final byte[] ACL_GLOBAL_NAME = ACL_TABLE_NAME;
/** Column family used to store ACL grants */
public static final String ACL_LIST_FAMILY_STR = "l";
public static final byte[] ACL_LIST_FAMILY = Bytes.toBytes(ACL_LIST_FAMILY_STR);
@@ -117,32 +121,22 @@ static void init(MasterServices master) throws IOException {
}
/**
- * Stores a new table permission grant in the access control lists table.
+ * Stores a new user permission grant in the access control lists table.
* @param conf the configuration
- * @param tableName the table to which access is being granted
- * @param username the user or group being granted the permission
- * @param perm the details of the permission being granted
+ * @param userPerm the details of the permission to be granted
* @throws IOException in the case of an error accessing the metadata table
*/
- static void addTablePermission(Configuration conf,
- byte[] tableName, String username, TablePermission perm)
- throws IOException {
+ static void addUserPermission(Configuration conf, UserPermission userPerm)
+ throws IOException {
+ Permission.Action[] actions = userPerm.getActions();
- Put p = new Put(tableName);
- byte[] key = Bytes.toBytes(username);
- if (perm.getFamily() != null && perm.getFamily().length > 0) {
- key = Bytes.add(key,
- Bytes.add(new byte[]{ACL_KEY_DELIMITER}, perm.getFamily()));
- if (perm.getQualifier() != null && perm.getQualifier().length > 0) {
- key = Bytes.add(key,
- Bytes.add(new byte[]{ACL_KEY_DELIMITER}, perm.getQualifier()));
- }
- }
+ Put p = new Put(userPerm.isGlobal() ? ACL_GLOBAL_NAME : userPerm.getTable());
+ byte[] key = userPermissionKey(userPerm);
- TablePermission.Action[] actions = perm.getActions();
if ((actions == null) || (actions.length == 0)) {
- LOG.warn("No actions associated with user '"+username+"'");
- return;
+ String msg = "No actions associated with user '" + Bytes.toString(userPerm.getUser()) + "'";
+ LOG.warn(msg);
+ throw new IOException(msg);
}
byte[] value = new byte[actions.length];
@@ -152,7 +146,7 @@ static void addTablePermission(Configuration conf,
p.add(ACL_LIST_FAMILY, key, value);
if (LOG.isDebugEnabled()) {
LOG.debug("Writing permission for table "+
- Bytes.toString(tableName)+" "+
+ Bytes.toString(userPerm.getTable())+" "+
Bytes.toString(key)+": "+Bytes.toStringBinary(value)
);
}
@@ -175,34 +169,17 @@ static void addTablePermission(Configuration conf,
* column qualifier "info:colA") will have no effect.
*
* @param conf the configuration
- * @param tableName the table of the current permission grant
- * @param userName the user or group currently granted the permission
- * @param perm the details of the permission to be revoked
+ * @param userPerm the details of the permission to be revoked
* @throws IOException if there is an error accessing the metadata table
*/
- static void removeTablePermission(Configuration conf,
- byte[] tableName, String userName, TablePermission perm)
- throws IOException {
+ static void removeUserPermission(Configuration conf, UserPermission userPerm)
+ throws IOException {
+
+ Delete d = new Delete(userPerm.isGlobal() ? ACL_GLOBAL_NAME : userPerm.getTable());
+ byte[] key = userPermissionKey(userPerm);
- Delete d = new Delete(tableName);
- byte[] key = null;
- if (perm.getFamily() != null && perm.getFamily().length > 0) {
- key = Bytes.toBytes(userName + ACL_KEY_DELIMITER +
- Bytes.toString(perm.getFamily()));
- if (perm.getQualifier() != null && perm.getQualifier().length > 0) {
- key = Bytes.toBytes(userName + ACL_KEY_DELIMITER +
- Bytes.toString(perm.getFamily()) + ACL_KEY_DELIMITER +
- Bytes.toString(perm.getQualifier()));
- } else {
- key = Bytes.toBytes(userName + ACL_KEY_DELIMITER +
- Bytes.toString(perm.getFamily()));
- }
- } else {
- key = Bytes.toBytes(userName);
- }
if (LOG.isDebugEnabled()) {
- LOG.debug("Removing permission for user '" + userName+ "': "+
- perm.toString());
+ LOG.debug("Removing permission "+ userPerm.toString());
}
d.deleteColumns(ACL_LIST_FAMILY, key);
HTable acls = null;
@@ -214,6 +191,95 @@ static void removeTablePermission(Configuration conf,
}
}
+ /**
+ * Remove specified table from the _acl_ table.
+ */
+ static void removeTablePermissions(Configuration conf, byte[] tableName)
+ throws IOException{
+ Delete d = new Delete(tableName);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Removing permissions of removed table "+ Bytes.toString(tableName));
+ }
+
+ HTable acls = null;
+ try {
+ acls = new HTable(conf, ACL_TABLE_NAME);
+ acls.delete(d);
+ } finally {
+ if (acls != null) acls.close();
+ }
+ }
+
+ /**
+ * Remove specified table column from the _acl_ table.
+ */
+ static void removeTablePermissions(Configuration conf, byte[] tableName, byte[] column)
+ throws IOException{
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Removing permissions of removed column " + Bytes.toString(column) +
+ " from table "+ Bytes.toString(tableName));
+ }
+
+ HTable acls = null;
+ try {
+ acls = new HTable(conf, ACL_TABLE_NAME);
+
+ Scan scan = new Scan();
+ scan.addFamily(ACL_LIST_FAMILY);
+
+ String columnName = Bytes.toString(column);
+ scan.setFilter(new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator(
+ String.format("(%s%s%s)|(%s%s)$",
+ ACL_KEY_DELIMITER, columnName, ACL_KEY_DELIMITER,
+ ACL_KEY_DELIMITER, columnName))));
+
+ Set qualifierSet = new TreeSet(Bytes.BYTES_COMPARATOR);
+ ResultScanner scanner = acls.getScanner(scan);
+ try {
+ for (Result res : scanner) {
+ for (byte[] q : res.getFamilyMap(ACL_LIST_FAMILY).navigableKeySet()) {
+ qualifierSet.add(q);
+ }
+ }
+ } finally {
+ scanner.close();
+ }
+
+ if (qualifierSet.size() > 0) {
+ Delete d = new Delete(tableName);
+ for (byte[] qualifier : qualifierSet) {
+ d.deleteColumns(ACL_LIST_FAMILY, qualifier);
+ }
+ acls.delete(d);
+ }
+ } finally {
+ if (acls != null) acls.close();
+ }
+ }
+
+ /**
+ * Build qualifier key from user permission:
+ * username
+ * username,family
+ * username,family,qualifier
+ */
+ static byte[] userPermissionKey(UserPermission userPerm) {
+ byte[] qualifier = userPerm.getQualifier();
+ byte[] family = userPerm.getFamily();
+ byte[] key = userPerm.getUser();
+
+ if (family != null && family.length > 0) {
+ key = Bytes.add(key, Bytes.add(new byte[]{ACL_KEY_DELIMITER}, family));
+ if (qualifier != null && qualifier.length > 0) {
+ key = Bytes.add(key, Bytes.add(new byte[]{ACL_KEY_DELIMITER}, qualifier));
+ }
+ }
+
+ return key;
+ }
+
/**
* Returns {@code true} if the given region is part of the {@code _acl_}
* metadata table.
@@ -222,6 +288,13 @@ static boolean isAclRegion(HRegion region) {
return Bytes.equals(ACL_TABLE_NAME, region.getTableDesc().getName());
}
+ /**
+ * Returns {@code true} if the given table is {@code _acl_} metadata table.
+ */
+ static boolean isAclTable(HTableDescriptor desc) {
+ return Bytes.equals(ACL_TABLE_NAME, desc.getName());
+ }
+
/**
* Loads all of the permission grants stored in a region of the {@code _acl_}
* table.
@@ -325,20 +398,12 @@ static Map> loadAll(
* used for storage.
*
*/
- static ListMultimap getTablePermissions(
- Configuration conf, byte[] tableName)
- throws IOException {
- /* TODO: -ROOT- and .META. cannot easily be handled because they must be
- * online before _acl_ table. Can anything be done here?
- */
- if (Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME) ||
- Bytes.equals(tableName, HConstants.META_TABLE_NAME) ||
- Bytes.equals(tableName, AccessControlLists.ACL_TABLE_NAME)) {
- return ArrayListMultimap.create(0,0);
- }
+ static ListMultimap getTablePermissions(Configuration conf,
+ byte[] tableName) throws IOException {
+ if (tableName == null) tableName = ACL_TABLE_NAME;
// for normal user tables, we just read the table row from _acl_
- ListMultimap perms = ArrayListMultimap.create();
+ ListMultimap perms = ArrayListMultimap.create();
HTable acls = null;
try {
acls = new HTable(conf, ACL_TABLE_NAME);
@@ -348,8 +413,8 @@ static ListMultimap getTablePermissions(
if (!row.isEmpty()) {
perms = parseTablePermissions(tableName, row);
} else {
- LOG.info("No permissions found in "+ACL_TABLE_NAME_STR+
- " for table "+Bytes.toString(tableName));
+ LOG.info("No permissions found in " + ACL_TABLE_NAME_STR + " for table "
+ + Bytes.toString(tableName));
}
} finally {
if (acls != null) acls.close();
diff --git a/security/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java b/security/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java
index 6108de898d3a..97b08498fa30 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java
@@ -1,61 +1,84 @@
/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
package org.apache.hadoop.hbase.security.access;
import java.io.IOException;
+import java.net.InetAddress;
+import java.security.PrivilegedExceptionAction;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
-import java.util.HashSet;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableNotDisabledException;
+import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.client.Append;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Increment;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.UserProvider;
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
import org.apache.hadoop.hbase.coprocessor.CoprocessorException;
import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
import org.apache.hadoop.hbase.coprocessor.MasterObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.RegionServerCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.RegionServerObserver;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.WritableByteArrayComparable;
import org.apache.hadoop.hbase.ipc.HBaseRPC;
import org.apache.hadoop.hbase.ipc.ProtocolSignature;
import org.apache.hadoop.hbase.ipc.RequestContext;
+import org.apache.hadoop.hbase.master.MasterServices;
+import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.InternalScanner;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
+import org.apache.hadoop.hbase.regionserver.Store;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.security.AccessDeniedException;
import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.access.Permission.Action;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.hadoop.hbase.util.Pair;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Lists;
@@ -95,7 +118,7 @@
*
*/
public class AccessController extends BaseRegionObserver
- implements MasterObserver, AccessControllerProtocol {
+ implements MasterObserver, RegionServerObserver, AccessControllerProtocol {
/**
* Represents the result of an authorization check for logging and error
* reporting.
@@ -106,12 +129,14 @@ private static class AuthResult {
private final byte[] family;
private final byte[] qualifier;
private final Permission.Action action;
+ private final String request;
private final String reason;
private final User user;
- public AuthResult(boolean allowed, String reason, User user,
+ public AuthResult(boolean allowed, String request, String reason, User user,
Permission.Action action, byte[] table, byte[] family, byte[] qualifier) {
this.allowed = allowed;
+ this.request = request;
this.reason = reason;
this.user = user;
this.table = table;
@@ -126,6 +151,8 @@ public AuthResult(boolean allowed, String reason, User user,
public String getReason() { return reason; }
+ public String getRequest() { return request; }
+
public String toContextString() {
return "(user=" + (user != null ? user.getName() : "UNKNOWN") + ", " +
"scope=" + (table == null ? "GLOBAL" : Bytes.toString(table)) + ", " +
@@ -139,19 +166,23 @@ public String toString() {
.append(toContextString()).toString();
}
- public static AuthResult allow(String reason, User user,
- Permission.Action action, byte[] table) {
- return new AuthResult(true, reason, user, action, table, null, null);
+ public static AuthResult allow(String request, String reason, User user, Permission.Action action,
+ byte[] table, byte[] family, byte[] qualifier) {
+ return new AuthResult(true, request, reason, user, action, table, family, qualifier);
+ }
+
+ public static AuthResult allow(String request, String reason, User user, Permission.Action action, byte[] table) {
+ return new AuthResult(true, request, reason, user, action, table, null, null);
}
- public static AuthResult deny(String reason, User user,
+ public static AuthResult deny(String request, String reason, User user,
Permission.Action action, byte[] table) {
- return new AuthResult(false, reason, user, action, table, null, null);
+ return new AuthResult(false, request, reason, user, action, table, null, null);
}
- public static AuthResult deny(String reason, User user,
+ public static AuthResult deny(String request, String reason, User user,
Permission.Action action, byte[] table, byte[] family, byte[] qualifier) {
- return new AuthResult(false, reason, user, action, table, family, qualifier);
+ return new AuthResult(false, request, reason, user, action, table, family, qualifier);
}
}
@@ -163,7 +194,7 @@ public static AuthResult deny(String reason, User user,
/**
* Version number for AccessControllerProtocol
*/
- private static final long PROTOCOL_VERSION = 2L;
+ private static final long PROTOCOL_VERSION = 1L;
TableAuthManager authManager = null;
@@ -178,9 +209,10 @@ public static AuthResult deny(String reason, User user,
private Map scannerOwners =
new MapMaker().weakKeys().makeMap();
+ private UserProvider userProvider;
+
void initialize(RegionCoprocessorEnvironment e) throws IOException {
final HRegion region = e.getRegion();
-
Map> tables =
AccessControlLists.loadAll(region);
// For each table, write out the table's permissions to the respective
@@ -188,12 +220,10 @@ void initialize(RegionCoprocessorEnvironment e) throws IOException {
for (Map.Entry> t:
tables.entrySet()) {
byte[] table = t.getKey();
- String tableName = Bytes.toString(table);
ListMultimap perms = t.getValue();
byte[] serialized = AccessControlLists.writePermissionsAsBytes(perms,
- e.getRegion().getConf());
- this.authManager.getZKPermissionWatcher().writeToZookeeper(tableName,
- serialized);
+ regionEnv.getConfiguration());
+ this.authManager.getZKPermissionWatcher().writeToZookeeper(table, serialized);
}
}
@@ -204,31 +234,28 @@ void initialize(RegionCoprocessorEnvironment e) throws IOException {
*/
void updateACL(RegionCoprocessorEnvironment e,
final Map> familyMap) {
- Set tableSet = new HashSet();
+ Set tableSet = new TreeSet(Bytes.BYTES_COMPARATOR);
for (Map.Entry> f : familyMap.entrySet()) {
List kvs = f.getValue();
for (KeyValue kv: kvs) {
- if (Bytes.compareTo(kv.getBuffer(), kv.getFamilyOffset(),
+ if (Bytes.equals(kv.getBuffer(), kv.getFamilyOffset(),
kv.getFamilyLength(), AccessControlLists.ACL_LIST_FAMILY, 0,
- AccessControlLists.ACL_LIST_FAMILY.length) == 0) {
- String tableName = Bytes.toString(kv.getRow());
- tableSet.add(tableName);
+ AccessControlLists.ACL_LIST_FAMILY.length)) {
+ tableSet.add(kv.getRow());
}
}
}
- for (String tableName: tableSet) {
+ ZKPermissionWatcher zkw = this.authManager.getZKPermissionWatcher();
+ Configuration conf = regionEnv.getConfiguration();
+ for (byte[] tableName: tableSet) {
try {
ListMultimap perms =
- AccessControlLists.getTablePermissions(regionEnv.getConfiguration(),
- Bytes.toBytes(tableName));
- byte[] serialized = AccessControlLists.writePermissionsAsBytes(
- perms, e.getRegion().getConf());
- this.authManager.getZKPermissionWatcher().writeToZookeeper(tableName,
- serialized);
+ AccessControlLists.getTablePermissions(conf, tableName);
+ byte[] serialized = AccessControlLists.writePermissionsAsBytes(perms, conf);
+ zkw.writeToZookeeper(tableName, serialized);
} catch (IOException ex) {
- LOG.error("Failed updating permissions mirror for '" + tableName +
- "'", ex);
+ LOG.error("Failed updating permissions mirror for '" + tableName + "'", ex);
}
}
}
@@ -247,41 +274,30 @@ void updateACL(RegionCoprocessorEnvironment e,
* the request
* @return
*/
- AuthResult permissionGranted(User user, TablePermission.Action permRequest,
+ AuthResult permissionGranted(String request, User user, TablePermission.Action permRequest,
RegionCoprocessorEnvironment e,
Map> families) {
HRegionInfo hri = e.getRegion().getRegionInfo();
- HTableDescriptor htd = e.getRegion().getTableDesc();
byte[] tableName = hri.getTableName();
// 1. All users need read access to .META. and -ROOT- tables.
// this is a very common operation, so deal with it quickly.
- if ((hri.isRootRegion() || hri.isMetaRegion()) &&
- (permRequest == TablePermission.Action.READ)) {
- return AuthResult.allow("All users allowed", user, permRequest,
- hri.getTableName());
+ if (hri.isRootRegion() || hri.isMetaRegion()) {
+ if (permRequest == TablePermission.Action.READ) {
+ return AuthResult.allow(request, "All users allowed", user, permRequest, tableName);
+ }
}
if (user == null) {
- return AuthResult.deny("No user associated with request!", null,
- permRequest, hri.getTableName());
+ return AuthResult.deny(request, "No user associated with request!", null, permRequest, tableName);
}
- // 2. The table owner has full privileges
- String owner = htd.getOwnerString();
- if (user.getShortName().equals(owner)) {
- // owner of the table has full access
- return AuthResult.allow("User is table owner", user, permRequest,
- hri.getTableName());
- }
-
- // 3. check for the table-level, if successful we can short-circuit
+ // 2. check for the table-level, if successful we can short-circuit
if (authManager.authorize(user, tableName, (byte[])null, permRequest)) {
- return AuthResult.allow("Table permission granted", user,
- permRequest, tableName);
+ return AuthResult.allow(request, "Table permission granted", user, permRequest, tableName);
}
- // 4. check permissions against the requested families
+ // 3. check permissions against the requested families
if (families != null && families.size() > 0) {
// all families must pass
for (Map.Entry> family : families.entrySet()) {
@@ -299,7 +315,7 @@ AuthResult permissionGranted(User user, TablePermission.Action permRequest,
for (byte[] qualifier : familySet) {
if (!authManager.authorize(user, tableName, family.getKey(),
qualifier, permRequest)) {
- return AuthResult.deny("Failed qualifier check", user,
+ return AuthResult.deny(request, "Failed qualifier check", user,
permRequest, tableName, family.getKey(), qualifier);
}
}
@@ -308,33 +324,40 @@ AuthResult permissionGranted(User user, TablePermission.Action permRequest,
for (KeyValue kv : kvList) {
if (!authManager.authorize(user, tableName, family.getKey(),
kv.getQualifier(), permRequest)) {
- return AuthResult.deny("Failed qualifier check", user,
+ return AuthResult.deny(request, "Failed qualifier check", user,
permRequest, tableName, family.getKey(), kv.getQualifier());
}
}
}
} else {
// no qualifiers and family-level check already failed
- return AuthResult.deny("Failed family check", user, permRequest,
+ return AuthResult.deny(request, "Failed family check", user, permRequest,
tableName, family.getKey(), null);
}
}
// all family checks passed
- return AuthResult.allow("All family checks passed", user, permRequest,
+ return AuthResult.allow(request, "All family checks passed", user, permRequest,
tableName);
}
- // 5. no families to check and table level access failed
- return AuthResult.deny("No families to check and table permission failed",
+ // 4. no families to check and table level access failed
+ return AuthResult.deny(request, "No families to check and table permission failed",
user, permRequest, tableName);
}
private void logResult(AuthResult result) {
if (AUDITLOG.isTraceEnabled()) {
+ InetAddress remoteAddr = null;
+ RequestContext ctx = RequestContext.get();
+ if (ctx != null) {
+ remoteAddr = ctx.getRemoteAddress();
+ }
AUDITLOG.trace("Access " + (result.isAllowed() ? "allowed" : "denied") +
" for user " + (result.getUser() != null ? result.getUser().getShortName() : "UNKNOWN") +
"; reason: " + result.getReason() +
+ "; remote address: " + (remoteAddr != null ? remoteAddr : "") +
+ "; request: " + result.getRequest() +
"; context: " + result.toContextString());
}
}
@@ -348,23 +371,55 @@ private User getActiveUser() throws IOException {
User user = RequestContext.getRequestUser();
if (!RequestContext.isInRequestContext()) {
// for non-rpc handling, fallback to system user
- user = User.getCurrent();
+ user = userProvider.getCurrent();
}
+
return user;
}
+ /**
+ * Authorizes that the current user has any of the given permissions for the
+ * given table, column family and column qualifier.
+ * @param tableName Table requested
+ * @param family Column family requested
+ * @param qualifier Column qualifier requested
+ * @throws IOException if obtaining the current user fails
+ * @throws AccessDeniedException if user has no authorization
+ */
+ private void requirePermission(String request, byte[] tableName, byte[] family, byte[] qualifier,
+ Action... permissions) throws IOException {
+ User user = getActiveUser();
+ AuthResult result = null;
+
+ for (Action permission : permissions) {
+ if (authManager.authorize(user, tableName, family, qualifier, permission)) {
+ result = AuthResult.allow(request, "Table permission granted", user,
+ permission, tableName, family, qualifier);
+ break;
+ } else {
+ // rest of the world
+ result = AuthResult.deny(request, "Insufficient permissions", user,
+ permission, tableName, family, qualifier);
+ }
+ }
+ logResult(result);
+ if (!result.isAllowed()) {
+ throw new AccessDeniedException("Insufficient permissions " + result.toContextString());
+ }
+ }
+
/**
* Authorizes that the current user has global privileges for the given action.
* @param perm The action being requested
* @throws IOException if obtaining the current user fails
* @throws AccessDeniedException if authorization is denied
*/
- private void requirePermission(Permission.Action perm) throws IOException {
+ private void requirePermission(String request, Permission.Action perm) throws IOException {
User user = getActiveUser();
if (authManager.authorize(user, perm)) {
- logResult(AuthResult.allow("Global check allowed", user, perm, null));
+ logResult(AuthResult.allow(request, "Global check allowed", user, perm, null));
} else {
- logResult(AuthResult.deny("Global check failed", user, perm, null));
+ logResult(AuthResult.deny(request, "Global check failed", user, perm, null));
throw new AccessDeniedException("Insufficient permissions for user '" +
(user != null ? user.getShortName() : "null") +"' (global, action=" +
perm.toString() + ")");
@@ -379,7 +434,7 @@ private void requirePermission(Permission.Action perm) throws IOException {
* @param families The set of column families present/required in the request
* @throws AccessDeniedException if the authorization check failed
*/
- private void requirePermission(Permission.Action perm,
+ private void requirePermission(String request, Permission.Action perm,
RegionCoprocessorEnvironment env, Collection families)
throws IOException {
// create a map of family-qualifier
@@ -387,7 +442,7 @@ private void requirePermission(Permission.Action perm,
for (byte[] family : families) {
familyMap.put(family, null);
}
- requirePermission(perm, env, familyMap);
+ requirePermission(request, perm, env, familyMap);
}
/**
@@ -398,12 +453,12 @@ private void requirePermission(Permission.Action perm,
* @param families The map of column families-qualifiers.
* @throws AccessDeniedException if the authorization check failed
*/
- private void requirePermission(Permission.Action perm,
+ public void requirePermission(String request, Permission.Action perm,
RegionCoprocessorEnvironment env,
Map> families)
throws IOException {
User user = getActiveUser();
- AuthResult result = permissionGranted(user, perm, env, families);
+ AuthResult result = permissionGranted(request, user, perm, env, families);
logResult(result);
if (!result.isAllowed()) {
@@ -467,17 +522,34 @@ private boolean hasFamilyQualifierPermission(User user,
/* ---- MasterObserver implementation ---- */
public void start(CoprocessorEnvironment env) throws IOException {
- // if running on HMaster
+
+ ZooKeeperWatcher zk = null;
if (env instanceof MasterCoprocessorEnvironment) {
- MasterCoprocessorEnvironment e = (MasterCoprocessorEnvironment)env;
- this.authManager = TableAuthManager.get(
- e.getMasterServices().getZooKeeper(),
- e.getConfiguration());
+ // if running on HMaster
+ MasterCoprocessorEnvironment mEnv = (MasterCoprocessorEnvironment) env;
+ zk = mEnv.getMasterServices().getZooKeeper();
+ } else if (env instanceof RegionServerCoprocessorEnvironment) {
+ RegionServerCoprocessorEnvironment rsEnv = (RegionServerCoprocessorEnvironment) env;
+ zk = rsEnv.getRegionServerServices().getZooKeeper();
+ } else if (env instanceof RegionCoprocessorEnvironment) {
+ // if running at region
+ regionEnv = (RegionCoprocessorEnvironment) env;
+ zk = regionEnv.getRegionServerServices().getZooKeeper();
}
- // if running at region
- if (env instanceof RegionCoprocessorEnvironment) {
- regionEnv = (RegionCoprocessorEnvironment)env;
+ // set the user provider
+ this.userProvider = UserProvider.instantiate(env.getConfiguration());
+
+ // If zk is null or IOException while obtaining auth manager,
+ // throw RuntimeException so that the coprocessor is unloaded.
+ if (zk != null) {
+ try {
+ this.authManager = TableAuthManager.get(zk, env.getConfiguration());
+ } catch (IOException ioe) {
+ throw new RuntimeException("Error obtaining TableAuthManager", ioe);
+ }
+ } else {
+ throw new RuntimeException("Error obtaining TableAuthManager, zk found null.");
}
}
@@ -488,115 +560,161 @@ public void stop(CoprocessorEnvironment env) {
@Override
public void preCreateTable(ObserverContext c,
HTableDescriptor desc, HRegionInfo[] regions) throws IOException {
- requirePermission(Permission.Action.CREATE);
-
- // default the table owner if not specified
- User owner = getActiveUser();
- if (desc.getOwnerString() == null ||
- desc.getOwnerString().equals("")) {
- desc.setOwner(owner);
- }
+ requirePermission("createTable", Permission.Action.CREATE);
}
@Override
public void postCreateTable(ObserverContext c,
- HTableDescriptor desc, HRegionInfo[] regions) throws IOException {}
+ final HTableDescriptor desc, HRegionInfo[] regions) throws IOException {
+ if (!AccessControlLists.isAclTable(desc)) {
+ final Configuration conf = c.getEnvironment().getConfiguration();
+ final String owner = (desc.getOwnerString() != null) ? desc.getOwnerString() :
+ getActiveUser().getShortName();
+ User.runAsLoginUser(new PrivilegedExceptionAction() {
+ @Override
+ public Void run() throws Exception {
+ UserPermission userperm = new UserPermission(Bytes.toBytes(owner), desc.getName(), null,
+ Action.values());
+ AccessControlLists.addUserPermission(conf, userperm);
+ return null;
+ }
+ });
+ }
+ }
@Override
- public void preDeleteTable(ObserverContext c,
- byte[] tableName) throws IOException {
- requirePermission(Permission.Action.CREATE);
+ public void preDeleteTable(ObserverContext c, byte[] tableName)
+ throws IOException {
+ requirePermission("deleteTable", tableName, null, null, Action.ADMIN, Action.CREATE);
}
+
@Override
public void postDeleteTable(ObserverContext c,
- byte[] tableName) throws IOException {}
-
+ final byte[] tableName) throws IOException {
+ final Configuration conf = c.getEnvironment().getConfiguration();
+ User.runAsLoginUser(new PrivilegedExceptionAction() {
+ @Override
+ public Void run() throws Exception {
+ AccessControlLists.removeTablePermissions(conf, tableName);
+ return null;
+ }
+ });
+ }
@Override
- public void preModifyTable(ObserverContext c,
- byte[] tableName, HTableDescriptor htd) throws IOException {
- requirePermission(Permission.Action.CREATE);
+ public void preModifyTable(ObserverContext c, byte[] tableName,
+ HTableDescriptor htd) throws IOException {
+ requirePermission("modifyTable", tableName, null, null, Action.ADMIN, Action.CREATE);
}
+
@Override
public void postModifyTable(ObserverContext c,
- byte[] tableName, HTableDescriptor htd) throws IOException {}
-
+ byte[] tableName, final HTableDescriptor htd) throws IOException {
+ final Configuration conf = c.getEnvironment().getConfiguration();
+ final String owner = (htd.getOwnerString() != null) ? htd.getOwnerString() :
+ getActiveUser().getShortName();
+ User.runAsLoginUser(new PrivilegedExceptionAction() {
+ @Override
+ public Void run() throws Exception {
+ UserPermission userperm = new UserPermission(Bytes.toBytes(owner), htd.getName(), null,
+ Action.values());
+ AccessControlLists.addUserPermission(conf, userperm);
+ return null;
+ }
+ });
+ }
@Override
- public void preAddColumn(ObserverContext c,
- byte[] tableName, HColumnDescriptor column) throws IOException {
- requirePermission(Permission.Action.CREATE);
+ public void preAddColumn(ObserverContext c, byte[] tableName,
+ HColumnDescriptor column) throws IOException {
+ requirePermission("addColumn", tableName, null, null, Action.ADMIN, Action.CREATE);
}
+
@Override
public void postAddColumn(ObserverContext c,
byte[] tableName, HColumnDescriptor column) throws IOException {}
-
@Override
- public void preModifyColumn(ObserverContext c,
- byte[] tableName, HColumnDescriptor descriptor) throws IOException {
- requirePermission(Permission.Action.CREATE);
+ public void preModifyColumn(ObserverContext c, byte[] tableName,
+ HColumnDescriptor descriptor) throws IOException {
+ requirePermission("modifyColumn", tableName, null, null, Action.ADMIN, Action.CREATE);
}
+
@Override
public void postModifyColumn(ObserverContext c,
byte[] tableName, HColumnDescriptor descriptor) throws IOException {}
-
@Override
- public void preDeleteColumn(ObserverContext c,
- byte[] tableName, byte[] col) throws IOException {
- requirePermission(Permission.Action.CREATE);
+ public void preDeleteColumn(ObserverContext c, byte[] tableName,
+ byte[] col) throws IOException {
+ requirePermission("deleteColumn", tableName, null, null, Action.ADMIN, Action.CREATE);
}
+
@Override
public void postDeleteColumn(ObserverContext c,
- byte[] tableName, byte[] col) throws IOException {}
-
+ final byte[] tableName, final byte[] col) throws IOException {
+ final Configuration conf = c.getEnvironment().getConfiguration();
+ User.runAsLoginUser(new PrivilegedExceptionAction() {
+ @Override
+ public Void run() throws Exception {
+ AccessControlLists.removeTablePermissions(conf, tableName, col);
+ return null;
+ }
+ });
+ this.authManager.getZKPermissionWatcher().deleteTableACLNode(tableName);
+ }
@Override
- public void preEnableTable(ObserverContext c,
- byte[] tableName) throws IOException {
- /* TODO: Allow for users with global CREATE permission and the table owner */
- requirePermission(Permission.Action.ADMIN);
+ public void preEnableTable(ObserverContext c, byte[] tableName)
+ throws IOException {
+ requirePermission("enableTable", tableName, null, null, Action.ADMIN, Action.CREATE);
}
+
@Override
public void postEnableTable(ObserverContext c,
byte[] tableName) throws IOException {}
@Override
- public void preDisableTable(ObserverContext c,
- byte[] tableName) throws IOException {
- /* TODO: Allow for users with global CREATE permission and the table owner */
- requirePermission(Permission.Action.ADMIN);
+ public void preDisableTable(ObserverContext c, byte[] tableName)
+ throws IOException {
+ if (Bytes.equals(tableName, AccessControlLists.ACL_GLOBAL_NAME)) {
+ throw new AccessDeniedException("Not allowed to disable "
+ + AccessControlLists.ACL_TABLE_NAME_STR + " table.");
+ }
+ requirePermission("disableTable", tableName, null, null, Action.ADMIN, Action.CREATE);
}
+
@Override
public void postDisableTable(ObserverContext c,
byte[] tableName) throws IOException {}
@Override
- public void preMove(ObserverContext c,
- HRegionInfo region, ServerName srcServer, ServerName destServer)
- throws IOException {
- requirePermission(Permission.Action.ADMIN);
+ public void preMove(ObserverContext c, HRegionInfo region,
+ ServerName srcServer, ServerName destServer) throws IOException {
+ requirePermission("move", region.getTableName(), null, null, Action.ADMIN);
}
+
@Override
public void postMove(ObserverContext c,
HRegionInfo region, ServerName srcServer, ServerName destServer)
throws IOException {}
@Override
- public void preAssign(ObserverContext c,
- HRegionInfo regionInfo) throws IOException {
- requirePermission(Permission.Action.ADMIN);
+ public void preAssign(ObserverContext c, HRegionInfo regionInfo)
+ throws IOException {
+ requirePermission("assign", regionInfo.getTableName(), null, null, Action.ADMIN);
}
+
@Override
public void postAssign(ObserverContext c,
HRegionInfo regionInfo) throws IOException {}
@Override
- public void preUnassign(ObserverContext c,
- HRegionInfo regionInfo, boolean force) throws IOException {
- requirePermission(Permission.Action.ADMIN);
+ public void preUnassign(ObserverContext c, HRegionInfo regionInfo,
+ boolean force) throws IOException {
+ requirePermission("unassign", regionInfo.getTableName(), null, null, Action.ADMIN);
}
+
@Override
public void postUnassign(ObserverContext c,
HRegionInfo regionInfo, boolean force) throws IOException {}
@@ -604,7 +722,7 @@ public void postUnassign(ObserverContext c,
@Override
public void preBalance(ObserverContext c)
throws IOException {
- requirePermission(Permission.Action.ADMIN);
+ requirePermission("balance", Permission.Action.ADMIN);
}
@Override
public void postBalance(ObserverContext c)
@@ -613,7 +731,7 @@ public void postBalance(ObserverContext c)
@Override
public boolean preBalanceSwitch(ObserverContext c,
boolean newValue) throws IOException {
- requirePermission(Permission.Action.ADMIN);
+ requirePermission("balanceSwitch", Permission.Action.ADMIN);
return newValue;
}
@Override
@@ -623,13 +741,13 @@ public void postBalanceSwitch(ObserverContext c,
@Override
public void preShutdown(ObserverContext c)
throws IOException {
- requirePermission(Permission.Action.ADMIN);
+ requirePermission("shutdown", Permission.Action.ADMIN);
}
@Override
public void preStopMaster(ObserverContext c)
throws IOException {
- requirePermission(Permission.Action.ADMIN);
+ requirePermission("stopMaster", Permission.Action.ADMIN);
}
@Override
@@ -639,31 +757,87 @@ public void postStartMaster(ObserverContext ctx)
AccessControlLists.init(ctx.getEnvironment().getMasterServices());
}
+ @Override
+ public void preSnapshot(final ObserverContext ctx,
+ final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor)
+ throws IOException {
+ requirePermission("snapshot", Permission.Action.ADMIN);
+ }
+
+ @Override
+ public void postSnapshot(final ObserverContext ctx,
+ final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor)
+ throws IOException {
+ }
+
+ @Override
+ public void preCloneSnapshot(final ObserverContext ctx,
+ final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor)
+ throws IOException {
+ requirePermission("cloneSnapshot", Permission.Action.ADMIN);
+ }
+
+ @Override
+ public void postCloneSnapshot(final ObserverContext ctx,
+ final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor)
+ throws IOException {
+ }
+
+ @Override
+ public void preRestoreSnapshot(final ObserverContext ctx,
+ final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor)
+ throws IOException {
+ requirePermission("restoreSnapshot", Permission.Action.ADMIN);
+ }
+
+ @Override
+ public void postRestoreSnapshot(final ObserverContext ctx,
+ final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor)
+ throws IOException {
+ }
+
+ @Override
+ public void preDeleteSnapshot(final ObserverContext ctx,
+ final SnapshotDescription snapshot) throws IOException {
+ requirePermission("deleteSnapshot", Permission.Action.ADMIN);
+ }
+
+ @Override
+ public void postDeleteSnapshot(final ObserverContext ctx,
+ final SnapshotDescription snapshot) throws IOException {
+ }
/* ---- RegionObserver implementation ---- */
@Override
- public void postOpen(ObserverContext c) {
- RegionCoprocessorEnvironment e = c.getEnvironment();
- final HRegion region = e.getRegion();
+ public void preOpen(ObserverContext e) throws IOException {
+ RegionCoprocessorEnvironment env = e.getEnvironment();
+ final HRegion region = env.getRegion();
if (region == null) {
- LOG.error("NULL region from RegionCoprocessorEnvironment in postOpen()");
+ LOG.error("NULL region from RegionCoprocessorEnvironment in preOpen()");
return;
+ } else {
+ HRegionInfo regionInfo = region.getRegionInfo();
+ if (isSpecialTable(regionInfo)) {
+ isSystemOrSuperUser(regionEnv.getConfiguration());
+ } else {
+ requirePermission("open", Action.ADMIN);
+ }
}
+ }
- try {
- this.authManager = TableAuthManager.get(
- e.getRegionServerServices().getZooKeeper(),
- e.getRegion().getConf());
- } catch (IOException ioe) {
- // pass along as a RuntimeException, so that the coprocessor is unloaded
- throw new RuntimeException("Error obtaining TableAuthManager", ioe);
+ @Override
+ public void postOpen(ObserverContext c) {
+ RegionCoprocessorEnvironment env = c.getEnvironment();
+ final HRegion region = env.getRegion();
+ if (region == null) {
+ LOG.error("NULL region from RegionCoprocessorEnvironment in postOpen()");
+ return;
}
-
if (AccessControlLists.isAclRegion(region)) {
aclRegion = true;
try {
- initialize(e);
+ initialize(env);
} catch (IOException ex) {
// if we can't obtain permissions, it's better to fail
// than perform checks incorrectly
@@ -672,11 +846,36 @@ public void postOpen(ObserverContext c) {
}
}
+ @Override
+ public void preFlush(ObserverContext e) throws IOException {
+ requirePermission("flush", getTableName(e.getEnvironment()), null, null, Action.ADMIN,
+ Action.CREATE);
+ }
+
+ @Override
+ public void preSplit(ObserverContext e) throws IOException {
+ requirePermission("split", getTableName(e.getEnvironment()), null, null, Action.ADMIN);
+ }
+
+ @Override
+ public InternalScanner preCompact(ObserverContext e,
+ final Store store, final InternalScanner scanner) throws IOException {
+ requirePermission("compact", getTableName(e.getEnvironment()), null, null, Action.ADMIN,
+ Action.CREATE);
+ return scanner;
+ }
+
+ @Override
+ public void preCompactSelection(final ObserverContext e,
+ final Store store, final List candidates) throws IOException {
+ requirePermission("compactSelection", getTableName(e.getEnvironment()), null, null, Action.ADMIN);
+ }
+
@Override
public void preGetClosestRowBefore(final ObserverContext c,
final byte [] row, final byte [] family, final Result result)
throws IOException {
- requirePermission(TablePermission.Action.READ, c.getEnvironment(),
+ requirePermission("getClosestRowBefore", TablePermission.Action.READ, c.getEnvironment(),
(family != null ? Lists.newArrayList(family) : null));
}
@@ -689,7 +888,7 @@ public void preGet(final ObserverContext c,
*/
RegionCoprocessorEnvironment e = c.getEnvironment();
User requestUser = getActiveUser();
- AuthResult authResult = permissionGranted(requestUser,
+ AuthResult authResult = permissionGranted("get", requestUser,
TablePermission.Action.READ, e, get.getFamilyMap());
if (!authResult.isAllowed()) {
if (hasFamilyQualifierPermission(requestUser,
@@ -706,7 +905,7 @@ public void preGet(final ObserverContext c,
} else {
get.setFilter(filter);
}
- logResult(AuthResult.allow("Access allowed with filter", requestUser,
+ logResult(AuthResult.allow("get", "Access allowed with filter", requestUser,
TablePermission.Action.READ, authResult.table));
} else {
logResult(authResult);
@@ -722,7 +921,7 @@ public void preGet(final ObserverContext c,
@Override
public boolean preExists(final ObserverContext c,
final Get get, final boolean exists) throws IOException {
- requirePermission(TablePermission.Action.READ, c.getEnvironment(),
+ requirePermission("exists", TablePermission.Action.READ, c.getEnvironment(),
get.familySet());
return exists;
}
@@ -731,7 +930,7 @@ public boolean preExists(final ObserverContext c,
public void prePut(final ObserverContext c,
final Put put, final WALEdit edit, final boolean writeToWAL)
throws IOException {
- requirePermission(TablePermission.Action.WRITE, c.getEnvironment(),
+ requirePermission("put", TablePermission.Action.WRITE, c.getEnvironment(),
put.getFamilyMap());
}
@@ -747,7 +946,7 @@ public void postPut(final ObserverContext c,
public void preDelete(final ObserverContext c,
final Delete delete, final WALEdit edit, final boolean writeToWAL)
throws IOException {
- requirePermission(TablePermission.Action.WRITE, c.getEnvironment(),
+ requirePermission("delete", TablePermission.Action.WRITE, c.getEnvironment(),
delete.getFamilyMap());
}
@@ -766,8 +965,9 @@ public boolean preCheckAndPut(final ObserverContext familyMap = Arrays.asList(new byte[][]{family});
+ requirePermission("checkAndPut", TablePermission.Action.READ, c.getEnvironment(), familyMap);
+ requirePermission("checkAndPut", TablePermission.Action.WRITE, c.getEnvironment(), familyMap);
return result;
}
@@ -777,8 +977,9 @@ public boolean preCheckAndDelete(final ObserverContext familyMap = Arrays.asList(new byte[][]{family});
+ requirePermission("checkAndDelete", TablePermission.Action.READ, c.getEnvironment(), familyMap);
+ requirePermission("checkAndDelete", TablePermission.Action.WRITE, c.getEnvironment(), familyMap);
return result;
}
@@ -787,16 +988,23 @@ public long preIncrementColumnValue(final ObserverContext c, Append append)
+ throws IOException {
+ requirePermission("append", TablePermission.Action.WRITE, c.getEnvironment(), append.getFamilyMap());
+ return null;
+ }
+
@Override
public Result preIncrement(final ObserverContext c,
final Increment increment)
throws IOException {
- requirePermission(TablePermission.Action.WRITE, c.getEnvironment(),
+ requirePermission("increment", TablePermission.Action.WRITE, c.getEnvironment(),
increment.getFamilyMap().keySet());
return null;
}
@@ -810,7 +1018,7 @@ public RegionScanner preScannerOpen(final ObserverContext
private void requireScannerOwner(InternalScanner s)
throws AccessDeniedException {
if (RequestContext.isInRequestContext()) {
+ String requestUserName = RequestContext.getRequestUserName();
String owner = scannerOwners.get(s);
- if (owner != null && !owner.equals(RequestContext.getRequestUserName())) {
- throw new AccessDeniedException("User '"+
- RequestContext.getRequestUserName()+"' is not the scanner owner!");
+ if (owner != null && !owner.equals(requestUserName)) {
+ throw new AccessDeniedException("User '"+ requestUserName +"' is not the scanner owner!");
+ }
+ }
+ }
+
+ /**
+ * Verifies user has WRITE privileges on
+ * the Column Families involved in the bulkLoadHFile
+ * request. Specific Column Write privileges are presently
+ * ignored.
+ */
+ @Override
+ public void preBulkLoadHFile(ObserverContext ctx,
+ List> familyPaths) throws IOException {
+ List cfs = new LinkedList();
+ for(Pair el : familyPaths) {
+ cfs.add(el.getFirst());
+ }
+ requirePermission("bulkLoadHFile", Permission.Action.CREATE, ctx.getEnvironment(), cfs);
+ }
+
+ private AuthResult hasSomeAccess(RegionCoprocessorEnvironment e, String request, Action action) throws IOException {
+ User requestUser = getActiveUser();
+ final byte[] tableName = e.getRegion().getTableDesc().getName();
+ AuthResult authResult = permissionGranted(request, requestUser,
+ action, e, Collections.EMPTY_MAP);
+ if (!authResult.isAllowed()) {
+ final Configuration conf = e.getConfiguration();
+ // hasSomeAccess is called from bulkload pre hooks
+ List perms =
+ User.runAsLoginUser(new PrivilegedExceptionAction>() {
+ @Override
+ public List run() throws Exception {
+ return AccessControlLists.getUserPermissions(conf, tableName);
+ }
+ });
+ for (UserPermission userPerm: perms) {
+ for (Action userAction: userPerm.getActions()) {
+ if (userAction.equals(action)) {
+ return AuthResult.allow(request, "Access allowed", requestUser,
+ action, tableName);
+ }
+ }
}
}
+ return authResult;
+ }
+
+ /**
+ * Authorization check for
+ * SecureBulkLoadProtocol.prepareBulkLoad()
+ * @param e
+ * @throws IOException
+ */
+ public void prePrepareBulkLoad(RegionCoprocessorEnvironment e) throws IOException {
+ AuthResult authResult = hasSomeAccess(e, "prepareBulkLoad", Action.WRITE);
+ logResult(authResult);
+ if (!authResult.isAllowed()) {
+ throw new AccessDeniedException("Insufficient permissions (table=" +
+ e.getRegion().getTableDesc().getNameAsString() + ", action=WRITE)");
+ }
+ }
+
+ /**
+ * Authorization security check for
+ * SecureBulkLoadProtocol.cleanupBulkLoad()
+ * @param e
+ * @throws IOException
+ */
+ //TODO this should end up as a coprocessor hook
+ public void preCleanupBulkLoad(RegionCoprocessorEnvironment e) throws IOException {
+ AuthResult authResult = hasSomeAccess(e, "cleanupBulkLoad", Action.WRITE);
+ logResult(authResult);
+ if (!authResult.isAllowed()) {
+ throw new AccessDeniedException("Insufficient permissions (table=" +
+ e.getRegion().getTableDesc().getNameAsString() + ", action=WRITE)");
+ }
}
/* ---- AccessControllerProtocol implementation ---- */
@@ -896,73 +1178,94 @@ private void requireScannerOwner(InternalScanner s)
* This will be restricted by both client side and endpoint implementations.
*/
@Override
- public void grant(byte[] user, TablePermission permission)
- throws IOException {
+ public void grant(final UserPermission perm) throws IOException {
// verify it's only running at .acl.
if (aclRegion) {
if (LOG.isDebugEnabled()) {
- LOG.debug("Received request to grant access permission to '"
- + Bytes.toString(user) + "'. "
- + permission.toString());
+ LOG.debug("Received request to grant access permission " + perm.toString());
}
- requirePermission(Permission.Action.ADMIN);
+ requirePermission("grant", perm.getTable(), perm.getFamily(), perm.getQualifier(), Action.ADMIN);
+
+ User.runAsLoginUser(new PrivilegedExceptionAction() {
+ @Override
+ public Void run() throws Exception {
+ AccessControlLists.addUserPermission(regionEnv.getConfiguration(), perm);
+ return null;
+ }
+ });
- AccessControlLists.addTablePermission(regionEnv.getConfiguration(),
- permission.getTable(), Bytes.toString(user), permission);
if (AUDITLOG.isTraceEnabled()) {
// audit log should store permission changes in addition to auth results
- AUDITLOG.trace("Granted user '" + Bytes.toString(user) + "' permission "
- + permission.toString());
+ AUDITLOG.trace("Granted permission " + perm.toString());
}
} else {
- throw new CoprocessorException(AccessController.class, "This method " +
- "can only execute at " +
- Bytes.toString(AccessControlLists.ACL_TABLE_NAME) + " table.");
+ throw new CoprocessorException(AccessController.class, "This method "
+ + "can only execute at " + Bytes.toString(AccessControlLists.ACL_TABLE_NAME) + " table.");
}
}
@Override
- public void revoke(byte[] user, TablePermission permission)
- throws IOException{
+ @Deprecated
+ public void grant(byte[] user, TablePermission permission)
+ throws IOException {
+ grant(new UserPermission(user, permission.getTable(),
+ permission.getFamily(), permission.getQualifier(),
+ permission.getActions()));
+ }
+
+ @Override
+ public void revoke(final UserPermission perm) throws IOException {
// only allowed to be called on _acl_ region
if (aclRegion) {
if (LOG.isDebugEnabled()) {
- LOG.debug("Received request to revoke access permission for '"
- + Bytes.toString(user) + "'. "
- + permission.toString());
+ LOG.debug("Received request to revoke access permission " + perm.toString());
}
- requirePermission(Permission.Action.ADMIN);
+ requirePermission("revoke", perm.getTable(), perm.getFamily(),
+ perm.getQualifier(), Action.ADMIN);
+
+ User.runAsLoginUser(new PrivilegedExceptionAction() {
+ @Override
+ public Void run() throws Exception {
+ AccessControlLists.removeUserPermission(regionEnv.getConfiguration(), perm);
+ return null;
+ }
+ });
- AccessControlLists.removeTablePermission(regionEnv.getConfiguration(),
- permission.getTable(), Bytes.toString(user), permission);
if (AUDITLOG.isTraceEnabled()) {
// audit log should record all permission changes
- AUDITLOG.trace("Revoked user '" + Bytes.toString(user) + "' permission "
- + permission.toString());
+ AUDITLOG.trace("Revoked permission " + perm.toString());
}
} else {
- throw new CoprocessorException(AccessController.class, "This method " +
- "can only execute at " +
- Bytes.toString(AccessControlLists.ACL_TABLE_NAME) + " table.");
+ throw new CoprocessorException(AccessController.class, "This method "
+ + "can only execute at " + Bytes.toString(AccessControlLists.ACL_TABLE_NAME) + " table.");
}
}
@Override
- public List getUserPermissions(final byte[] tableName)
+ @Deprecated
+ public void revoke(byte[] user, TablePermission permission)
throws IOException {
+ revoke(new UserPermission(user, permission.getTable(),
+ permission.getFamily(), permission.getQualifier(),
+ permission.getActions()));
+ }
+
+ @Override
+ public List getUserPermissions(final byte[] tableName) throws IOException {
// only allowed to be called on _acl_ region
if (aclRegion) {
- requirePermission(Permission.Action.ADMIN);
-
- List perms = AccessControlLists.getUserPermissions
- (regionEnv.getConfiguration(), tableName);
- return perms;
+ requirePermission("userPermissions", tableName, null, null, Action.ADMIN);
+ return User.runAsLoginUser(new PrivilegedExceptionAction>() {
+ @Override
+ public List run() throws Exception {
+ return AccessControlLists.getUserPermissions(regionEnv.getConfiguration(), tableName);
+ }
+ });
} else {
- throw new CoprocessorException(AccessController.class, "This method " +
- "can only execute at " +
- Bytes.toString(AccessControlLists.ACL_TABLE_NAME) + " table.");
+ throw new CoprocessorException(AccessController.class, "This method "
+ + "can only execute at " + Bytes.toString(AccessControlLists.ACL_TABLE_NAME) + " table.");
}
}
@@ -989,12 +1292,12 @@ public void checkPermissions(Permission[] permissions) throws IOException {
}
}
- requirePermission(action, regionEnv, familyMap);
+ requirePermission("checkPermissions", action, regionEnv, familyMap);
}
} else {
for (Permission.Action action : permission.getActions()) {
- requirePermission(action);
+ requirePermission("checkPermissions", action);
}
}
}
@@ -1027,4 +1330,90 @@ private byte[] getTableName(RegionCoprocessorEnvironment e) {
}
return tableName;
}
+
+
+ @Override
+ public void preClose(ObserverContext e, boolean abortRequested)
+ throws IOException {
+ requirePermission("close", Permission.Action.ADMIN);
+ }
+
+ @Override
+ public void preLockRow(ObserverContext ctx, byte[] regionName,
+ byte[] row) throws IOException {
+ requirePermission("lockRow", getTableName(ctx.getEnvironment()), null, null,
+ Permission.Action.WRITE, Permission.Action.CREATE);
+ }
+
+ @Override
+ public void preUnlockRow(ObserverContext ctx, byte[] regionName,
+ long lockId) throws IOException {
+ requirePermission("unlockRow", getTableName(ctx.getEnvironment()), null, null,
+ Permission.Action.WRITE, Permission.Action.CREATE);
+ }
+
+ private void isSystemOrSuperUser(Configuration conf) throws IOException {
+ User user = userProvider.getCurrent();
+ if (user == null) {
+ throw new IOException("Unable to obtain the current user, "
+ + "authorization checks for internal operations will not work correctly!");
+ }
+
+ String currentUser = user.getShortName();
+ List superusers = Lists.asList(currentUser,
+ conf.getStrings(AccessControlLists.SUPERUSER_CONF_KEY, new String[0]));
+
+ User activeUser = getActiveUser();
+ if (!(superusers.contains(activeUser.getShortName()))) {
+ throw new AccessDeniedException("User '" + (user != null ? user.getShortName() : "null")
+ + "is not system or super user.");
+ }
+ }
+
+ private boolean isSpecialTable(HRegionInfo regionInfo) {
+ byte[] tableName = regionInfo.getTableName();
+ return tableName.equals(AccessControlLists.ACL_TABLE_NAME)
+ || tableName.equals(Bytes.toBytes("-ROOT-"))
+ || tableName.equals(Bytes.toBytes(".META."));
+ }
+
+ @Override
+ public void preStopRegionServer(ObserverContext env)
+ throws IOException {
+ requirePermission("stop", Permission.Action.ADMIN);
+ }
+
+ @Override
+ public void preGetTableDescriptors(ObserverContext ctx,
+ List tableNamesList, List descriptors) throws IOException {
+ // If the list is empty, this is a request for all table descriptors and requires GLOBAL
+ // ADMIN privs.
+ if (tableNamesList == null || tableNamesList.isEmpty()) {
+ requirePermission("getTableDescriptors", Permission.Action.ADMIN);
+ }
+ // Otherwise, if the requestor has ADMIN or CREATE privs for all listed tables, the
+ // request can be granted.
+ else {
+ MasterServices masterServices = ctx.getEnvironment().getMasterServices();
+ for (String tableName: tableNamesList) {
+ // Do not deny if the table does not exist
+ byte[] nameAsBytes = Bytes.toBytes(tableName);
+ try {
+ masterServices.checkTableModifiable(nameAsBytes);
+ } catch (TableNotFoundException ex) {
+ // Skip checks for a table that does not exist
+ continue;
+ } catch (TableNotDisabledException ex) {
+ // We don't care about this
+ }
+ requirePermission("getTableDescriptors", nameAsBytes, null, null,
+ Permission.Action.ADMIN, Permission.Action.CREATE);
+ }
+ }
+ }
+
+ @Override
+ public void postGetTableDescriptors(ObserverContext ctx,
+ List descriptors) throws IOException {
+ }
}
diff --git a/security/src/main/java/org/apache/hadoop/hbase/security/access/AccessControllerProtocol.java b/security/src/main/java/org/apache/hadoop/hbase/security/access/AccessControllerProtocol.java
index 78cca4f88df4..2ecb60a94bff 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/security/access/AccessControllerProtocol.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/security/access/AccessControllerProtocol.java
@@ -28,8 +28,15 @@
*/
public interface AccessControllerProtocol extends CoprocessorProtocol {
- /* V2: Added {@link #checkPermissions(Permission...)}) */
- public static final long VERSION = 2L;
+ public static final long VERSION = 1L;
+
+ /**
+ * Grants the given user or group the privilege to perform the given actions
+ * @param userPermission the details of the provided user permissions
+ * @throws IOException if the grant could not be applied
+ */
+ public void grant(UserPermission userPermission)
+ throws IOException;
/**
* Grants the given user or group the privilege to perform the given actions
@@ -38,10 +45,26 @@ public interface AccessControllerProtocol extends CoprocessorProtocol {
* the grant
* @param permission the details of the provided permissions
* @throws IOException if the grant could not be applied
+ * @deprecated Use {@link #revoke(UserPermission userPermission)} instead
*/
+ @Deprecated
public void grant(byte[] user, TablePermission permission)
throws IOException;
+ /**
+ * Revokes a previously granted privilege from a user or group.
+ * Note that the provided {@link TablePermission} details must exactly match
+ * a stored grant. For example, if user "bob" has been granted "READ" access
+ * to table "data", over column family and qualifer "info:colA", then the
+ * table, column family and column qualifier must all be specified.
+ * Attempting to revoke permissions over just the "data" table will have
+ * no effect.
+ * @param permission the details of the previously granted permission to revoke
+ * @throws IOException if the revocation could not be performed
+ */
+ public void revoke(UserPermission userPermission)
+ throws IOException;
+
/**
* Revokes a previously granted privilege from a user or group.
* Note that the provided {@link TablePermission} details must exactly match
@@ -54,7 +77,9 @@ public void grant(byte[] user, TablePermission permission)
* privileges are being revoked
* @param permission the details of the previously granted permission to revoke
* @throws IOException if the revocation could not be performed
+ * @deprecated Use {@link #revoke(UserPermission userPermission)} instead
*/
+ @Deprecated
public void revoke(byte[] user, TablePermission permission)
throws IOException;
@@ -82,5 +107,4 @@ public List getUserPermissions(byte[] tableName)
*/
public void checkPermissions(Permission[] permissions)
throws IOException;
-
}
diff --git a/security/src/main/java/org/apache/hadoop/hbase/security/access/SecureBulkLoadEndpoint.java b/security/src/main/java/org/apache/hadoop/hbase/security/access/SecureBulkLoadEndpoint.java
new file mode 100644
index 000000000000..1a93c47295b6
--- /dev/null
+++ b/security/src/main/java/org/apache/hadoop/hbase/security/access/SecureBulkLoadEndpoint.java
@@ -0,0 +1,328 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.security.access;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hbase.CoprocessorEnvironment;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
+import org.apache.hadoop.hbase.client.UserProvider;
+import org.apache.hadoop.hbase.coprocessor.BaseEndpointCoprocessor;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.ipc.RequestContext;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Methods;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+
+import java.io.IOException;
+import java.math.BigInteger;
+import java.security.PrivilegedAction;
+import java.security.SecureRandom;
+import java.util.List;
+
+/**
+ * Coprocessor service for bulk loads in secure mode.
+ * This coprocessor has to be installed as part of enabling
+ * security in HBase.
+ *
+ * This service addresses two issues:
+ *
+ * 1. Moving files in a secure filesystem wherein the HBase Client
+ * and HBase Server are different filesystem users.
+ * 2. Does moving in a secure manner. Assuming that the filesystem
+ * is POSIX compliant.
+ *
+ * The algorithm is as follows:
+ *
+ * 1. Create an hbase owned staging directory which is
+ * world traversable (711): /hbase/staging
+ * 2. A user writes out data to his secure output directory: /user/foo/data
+ * 3. A call is made to hbase to create a secret staging directory
+ * which globally rwx (777): /user/staging/averylongandrandomdirectoryname
+ * 4. The user makes the data world readable and writable, then moves it
+ * into the random staging directory, then calls bulkLoadHFiles()
+ *
+ * Like delegation tokens the strength of the security lies in the length
+ * and randomness of the secret directory.
+ *
+ */
+@InterfaceAudience.Private
+public class SecureBulkLoadEndpoint extends BaseEndpointCoprocessor
+ implements SecureBulkLoadProtocol {
+
+ public static final long VERSION = 0L;
+
+ //Random number is 320 bits wide
+ private static final int RANDOM_WIDTH = 320;
+ //We picked 32 as the radix, so the character set
+ //will only contain alpha numeric values
+ //320/5 = 64 characters
+ private static final int RANDOM_RADIX = 32;
+
+ private static Log LOG = LogFactory.getLog(SecureBulkLoadEndpoint.class);
+
+ private final static FsPermission PERM_ALL_ACCESS = FsPermission.valueOf("-rwxrwxrwx");
+ private final static FsPermission PERM_HIDDEN = FsPermission.valueOf("-rwx--x--x");
+ private final static String BULKLOAD_STAGING_DIR = "hbase.bulkload.staging.dir";
+
+ private SecureRandom random;
+ private FileSystem fs;
+ private Configuration conf;
+
+ //two levels so it doesn't get deleted accidentally
+ //no sticky bit in Hadoop 1.0
+ private Path baseStagingDir;
+
+ private RegionCoprocessorEnvironment env;
+
+ private UserProvider provider;
+
+ @Override
+ public void start(CoprocessorEnvironment env) {
+ super.start(env);
+
+ this.env = (RegionCoprocessorEnvironment)env;
+ random = new SecureRandom();
+ conf = env.getConfiguration();
+ baseStagingDir = getBaseStagingDir(conf);
+ this.provider = UserProvider.instantiate(conf);
+
+ try {
+ fs = FileSystem.get(conf);
+ fs.mkdirs(baseStagingDir, PERM_HIDDEN);
+ fs.setPermission(baseStagingDir, PERM_HIDDEN);
+ //no sticky bit in hadoop-1.0, making directory nonempty so it never gets erased
+ fs.mkdirs(new Path(baseStagingDir,"DONOTERASE"), PERM_HIDDEN);
+ FileStatus status = fs.getFileStatus(baseStagingDir);
+ if(status == null) {
+ throw new IllegalStateException("Failed to create staging directory");
+ }
+ if(!status.getPermission().equals(PERM_HIDDEN)) {
+ throw new IllegalStateException("Directory already exists but permissions aren't set to '-rwx--x--x' ");
+ }
+ } catch (IOException e) {
+ throw new IllegalStateException("Failed to get FileSystem instance",e);
+ }
+ }
+
+ @Override
+ public String prepareBulkLoad(byte[] tableName) throws IOException {
+ getAccessController().prePrepareBulkLoad(env);
+ return createStagingDir(baseStagingDir, getActiveUser(), tableName).toString();
+ }
+
+ @Override
+ public void cleanupBulkLoad(String bulkToken) throws IOException {
+ getAccessController().preCleanupBulkLoad(env);
+ fs.delete(createStagingDir(baseStagingDir,
+ getActiveUser(),
+ env.getRegion().getTableDesc().getName(),
+ new Path(bulkToken).getName()),
+ true);
+ }
+
+ @Override
+ public boolean bulkLoadHFiles(final List> familyPaths,
+ final Token> userToken, final String bulkToken, boolean assignSeqNum) throws IOException {
+ User user = getActiveUser();
+ final UserGroupInformation ugi = user.getUGI();
+ if(userToken != null) {
+ ugi.addToken(userToken);
+ } else if (provider.isHadoopSecurityEnabled()) {
+ //we allow this to pass through in "simple" security mode
+ //for mini cluster testing
+ throw new DoNotRetryIOException("User token cannot be null");
+ }
+
+ HRegion region = env.getRegion();
+ boolean bypass = false;
+ if (region.getCoprocessorHost() != null) {
+ bypass = region.getCoprocessorHost().preBulkLoadHFile(familyPaths);
+ }
+ boolean loaded = false;
+ final IOException[] es = new IOException[1];
+ if (!bypass) {
+ loaded = ugi.doAs(new PrivilegedAction() {
+ @Override
+ public Boolean run() {
+ FileSystem fs = null;
+ try {
+ Configuration conf = env.getConfiguration();
+ fs = FileSystem.get(conf);
+ for(Pair el: familyPaths) {
+ Path p = new Path(el.getSecond());
+ LOG.debug("Setting permission for: " + p);
+ fs.setPermission(p, PERM_ALL_ACCESS);
+ Path stageFamily = new Path(bulkToken, Bytes.toString(el.getFirst()));
+ if(!fs.exists(stageFamily)) {
+ fs.mkdirs(stageFamily);
+ fs.setPermission(stageFamily, PERM_ALL_ACCESS);
+ }
+ }
+ //We call bulkLoadHFiles as requesting user
+ //To enable access prior to staging
+ return env.getRegion().bulkLoadHFiles(familyPaths,
+ new SecureBulkLoadListener(fs, bulkToken));
+ }
+ catch(DoNotRetryIOException e){
+ es[0] = e;
+ }
+ catch (Exception e) {
+ LOG.error("Failed to complete bulk load", e);
+ }
+ return false;
+ }
+ });
+ }
+
+ if (es[0] != null) {
+ throw es[0];
+ }
+
+ if (region.getCoprocessorHost() != null) {
+ loaded = region.getCoprocessorHost().postBulkLoadHFile(familyPaths, loaded);
+ }
+ return loaded;
+ }
+
+ @Override
+ public long getProtocolVersion(String protocol, long clientVersion)
+ throws IOException {
+ if (SecureBulkLoadProtocol.class.getName().equals(protocol)) {
+ return SecureBulkLoadEndpoint.VERSION;
+ }
+ LOG.warn("Unknown protocol requested: " + protocol);
+ return -1;
+ }
+
+ private AccessController getAccessController() {
+ return (AccessController) this.env.getRegion()
+ .getCoprocessorHost().findCoprocessor(AccessController.class.getName());
+ }
+
+ private Path createStagingDir(Path baseDir, User user, byte[] tableName) throws IOException {
+ String randomDir = user.getShortName()+"__"+Bytes.toString(tableName)+"__"+
+ (new BigInteger(RANDOM_WIDTH, random).toString(RANDOM_RADIX));
+ return createStagingDir(baseDir, user, tableName, randomDir);
+ }
+
+ private Path createStagingDir(Path baseDir,
+ User user,
+ byte[] tableName,
+ String randomDir) throws IOException {
+ Path p = new Path(baseDir, randomDir);
+ fs.mkdirs(p, PERM_ALL_ACCESS);
+ fs.setPermission(p, PERM_ALL_ACCESS);
+ return p;
+ }
+
+ private User getActiveUser() throws IOException {
+ User user = RequestContext.getRequestUser();
+ if (!RequestContext.isInRequestContext()) {
+ throw new DoNotRetryIOException("Failed to get requesting user");
+ }
+
+ //this is for testing
+ if("simple".equalsIgnoreCase(conf.get(User.HBASE_SECURITY_CONF_KEY))) {
+ return User.createUserForTesting(conf, user.getShortName(), new String[]{});
+ }
+
+ return user;
+ }
+
+ /**
+ * This returns the staging path for a given column family.
+ * This is needed for clean recovery and called reflectively in LoadIncrementalHFiles
+ */
+ public static Path getStagingPath(Configuration conf, String bulkToken, byte[] family) {
+ Path stageP = new Path(getBaseStagingDir(conf), bulkToken);
+ return new Path(stageP, Bytes.toString(family));
+ }
+
+ private static Path getBaseStagingDir(Configuration conf) {
+ return new Path(conf.get(BULKLOAD_STAGING_DIR, "/tmp/hbase-staging"));
+ }
+
+ private static class SecureBulkLoadListener implements HRegion.BulkLoadListener {
+ private FileSystem fs;
+ private String stagingDir;
+
+ public SecureBulkLoadListener(FileSystem fs, String stagingDir) {
+ this.fs = fs;
+ this.stagingDir = stagingDir;
+ }
+
+ @Override
+ public String prepareBulkLoad(final byte[] family, final String srcPath) throws IOException {
+ Path p = new Path(srcPath);
+ Path stageP = new Path(stagingDir, new Path(Bytes.toString(family), p.getName()));
+
+ if(!isFile(p)) {
+ throw new IOException("Path does not reference a file: " + p);
+ }
+
+ LOG.debug("Moving " + p + " to " + stageP);
+ if(!fs.rename(p, stageP)) {
+ throw new IOException("Failed to move HFile: " + p + " to " + stageP);
+ }
+ return stageP.toString();
+ }
+
+ @Override
+ public void doneBulkLoad(byte[] family, String srcPath) throws IOException {
+ LOG.debug("Bulk Load done for: " + srcPath);
+ }
+
+ @Override
+ public void failedBulkLoad(final byte[] family, final String srcPath) throws IOException {
+ Path p = new Path(srcPath);
+ Path stageP = new Path(stagingDir,
+ new Path(Bytes.toString(family), p.getName()));
+ LOG.debug("Moving " + stageP + " back to " + p);
+ if(!fs.rename(stageP, p))
+ throw new IOException("Failed to move HFile: " + stageP + " to " + p);
+ }
+
+ /**
+ * Check if the path is referencing a file.
+ * This is mainly needed to avoid symlinks.
+ * @param p
+ * @return true if the p is a file
+ * @throws IOException
+ */
+ private boolean isFile(Path p) throws IOException {
+ FileStatus status = fs.getFileStatus(p);
+ boolean isFile = !status.isDir();
+ try {
+ isFile = isFile && !(Boolean)Methods.call(FileStatus.class, status, "isSymlink", null, null);
+ } catch (Exception e) {
+ }
+ return isFile;
+ }
+ }
+}
diff --git a/security/src/main/java/org/apache/hadoop/hbase/security/access/SecureBulkLoadProtocol.java b/security/src/main/java/org/apache/hadoop/hbase/security/access/SecureBulkLoadProtocol.java
new file mode 100644
index 000000000000..63f45fd3f35c
--- /dev/null
+++ b/security/src/main/java/org/apache/hadoop/hbase/security/access/SecureBulkLoadProtocol.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.security.access;
+
+import org.apache.hadoop.hbase.ipc.CoprocessorProtocol;
+import org.apache.hadoop.hbase.security.TokenInfo;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.security.token.Token;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Provides a secure way to bulk load data onto HBase
+ * These are internal API. Bulk load should be initiated
+ * via {@link org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles}
+ * with security enabled.
+ */
+@TokenInfo("HBASE_AUTH_TOKEN")
+public interface SecureBulkLoadProtocol extends CoprocessorProtocol {
+
+ /**
+ * Prepare for bulk load.
+ * Will be called before bulkLoadHFiles()
+ * @param tableName
+ * @return a bulkToken which uniquely identifies the bulk session
+ * @throws IOException
+ */
+ String prepareBulkLoad(byte[] tableName) throws IOException;
+
+ /**
+ * Cleanup after bulk load.
+ * Will be called after bulkLoadHFiles().
+ * @param bulkToken
+ * @throws IOException
+ */
+ void cleanupBulkLoad(String bulkToken) throws IOException;
+
+ /**
+ * Secure version of HRegionServer.bulkLoadHFiles().
+ * @param familyPaths column family to HFile path pairs
+ * @param userToken requesting user's HDFS delegation token
+ * @param bulkToken
+ * @param assignSeqId
+ * @return
+ * @throws IOException
+ */
+ boolean bulkLoadHFiles(List> familyPaths,
+ Token> userToken, String bulkToken, boolean assignSeqNum) throws IOException;
+
+}
diff --git a/security/src/main/java/org/apache/hadoop/hbase/security/access/TableAuthManager.java b/security/src/main/java/org/apache/hadoop/hbase/security/access/TableAuthManager.java
index 2c3870f932e6..3e3257cd6e72 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/security/access/TableAuthManager.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/security/access/TableAuthManager.java
@@ -25,6 +25,7 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.UserProvider;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
@@ -40,22 +41,59 @@
* Performs authorization checks for a given user's assigned permissions
*/
public class TableAuthManager {
- /** Key for the user and group cache maps for globally assigned permissions */
- private static final String GLOBAL_CACHE_KEY = ".access.";
+ private static class PermissionCache {
+ /** Cache of user permissions */
+ private ListMultimap userCache = ArrayListMultimap.create();
+ /** Cache of group permissions */
+ private ListMultimap groupCache = ArrayListMultimap.create();
+
+ public List getUser(String user) {
+ return userCache.get(user);
+ }
+
+ public void putUser(String user, T perm) {
+ userCache.put(user, perm);
+ }
+
+ public List replaceUser(String user, Iterable extends T> perms) {
+ return userCache.replaceValues(user, perms);
+ }
+
+ public List getGroup(String group) {
+ return groupCache.get(group);
+ }
+
+ public void putGroup(String group, T perm) {
+ groupCache.put(group, perm);
+ }
+
+ public List replaceGroup(String group, Iterable extends T> perms) {
+ return groupCache.replaceValues(group, perms);
+ }
+
+ /**
+ * Returns a combined map of user and group permissions, with group names prefixed by
+ * {@link AccessControlLists#GROUP_PREFIX}.
+ */
+ public ListMultimap getAllPermissions() {
+ ListMultimap tmp = ArrayListMultimap.create();
+ tmp.putAll(userCache);
+ for (String group : groupCache.keySet()) {
+ tmp.putAll(AccessControlLists.GROUP_PREFIX + group, groupCache.get(group));
+ }
+ return tmp;
+ }
+ }
+
private static Log LOG = LogFactory.getLog(TableAuthManager.class);
private static TableAuthManager instance;
- /** Cache of global user permissions */
- private ListMultimap USER_CACHE = ArrayListMultimap.create();
- /** Cache of global group permissions */
- private ListMultimap GROUP_CACHE = ArrayListMultimap.create();
-
- private ConcurrentSkipListMap> TABLE_USER_CACHE =
- new ConcurrentSkipListMap>(Bytes.BYTES_COMPARATOR);
+ /** Cache of global permissions */
+ private volatile PermissionCache globalCache;
- private ConcurrentSkipListMap> TABLE_GROUP_CACHE =
- new ConcurrentSkipListMap>(Bytes.BYTES_COMPARATOR);
+ private ConcurrentSkipListMap> tableCache =
+ new ConcurrentSkipListMap>(Bytes.BYTES_COMPARATOR);
private Configuration conf;
private ZKPermissionWatcher zkperms;
@@ -63,23 +101,30 @@ public class TableAuthManager {
private TableAuthManager(ZooKeeperWatcher watcher, Configuration conf)
throws IOException {
this.conf = conf;
+
+ // initialize global permissions based on configuration
+ globalCache = initGlobal(conf);
+
this.zkperms = new ZKPermissionWatcher(watcher, this, conf);
try {
this.zkperms.start();
} catch (KeeperException ke) {
LOG.error("ZooKeeper initialization failed", ke);
}
-
- // initialize global permissions based on configuration
- initGlobal(conf);
}
- private void initGlobal(Configuration conf) throws IOException {
- User user = User.getCurrent();
+ /**
+ * Returns a new {@code PermissionCache} initialized with permission assignments
+ * from the {@code hbase.superuser} configuration key.
+ */
+ private PermissionCache initGlobal(Configuration conf) throws IOException {
+ UserProvider userProvider = UserProvider.instantiate(conf);
+ User user = userProvider.getCurrent();
if (user == null) {
throw new IOException("Unable to obtain the current user, " +
"authorization checks for internal operations will not work correctly!");
}
+ PermissionCache newCache = new PermissionCache();
String currentUser = user.getShortName();
// the system user is always included
@@ -88,13 +133,14 @@ private void initGlobal(Configuration conf) throws IOException {
if (superusers != null) {
for (String name : superusers) {
if (AccessControlLists.isGroupPrincipal(name)) {
- GROUP_CACHE.put(AccessControlLists.getGroupName(name),
+ newCache.putGroup(AccessControlLists.getGroupName(name),
new Permission(Permission.Action.values()));
} else {
- USER_CACHE.put(name, new Permission(Permission.Action.values()));
+ newCache.putUser(name, new Permission(Permission.Action.values()));
}
}
}
+ return newCache;
}
public ZKPermissionWatcher getZKPermissionWatcher() {
@@ -103,60 +149,71 @@ public ZKPermissionWatcher getZKPermissionWatcher() {
public void refreshCacheFromWritable(byte[] table, byte[] data) throws IOException {
if (data != null && data.length > 0) {
- DataInput in = new DataInputStream( new ByteArrayInputStream(data) );
+ DataInput in = new DataInputStream(new ByteArrayInputStream(data));
ListMultimap perms = AccessControlLists.readPermissions(in, conf);
- cache(table, perms);
+ if (perms != null) {
+ if (Bytes.equals(table, AccessControlLists.ACL_GLOBAL_NAME)) {
+ updateGlobalCache(perms);
+ } else {
+ updateTableCache(table, perms);
+ }
+ }
} else {
LOG.debug("Skipping permission cache refresh because writable data is empty");
}
}
/**
- * Updates the internal permissions cache for a single table, splitting
- * the permissions listed into separate caches for users and groups to optimize
- * group lookups.
- *
- * @param table
- * @param tablePerms
+ * Updates the internal global permissions cache
+ *
+ * @param userPerms
*/
- private void cache(byte[] table,
- ListMultimap tablePerms) {
- // split user from group assignments so we don't have to prepend the group
- // prefix every time we query for groups
- ListMultimap userPerms = ArrayListMultimap.create();
- ListMultimap groupPerms = ArrayListMultimap.create();
-
- if (tablePerms != null) {
- for (Map.Entry entry : tablePerms.entries()) {
+ private void updateGlobalCache(ListMultimap userPerms) {
+ PermissionCache newCache = null;
+ try {
+ newCache = initGlobal(conf);
+ for (Map.Entry entry : userPerms.entries()) {
if (AccessControlLists.isGroupPrincipal(entry.getKey())) {
- groupPerms.put(
- entry.getKey().substring(AccessControlLists.GROUP_PREFIX.length()),
- entry.getValue());
+ newCache.putGroup(AccessControlLists.getGroupName(entry.getKey()),
+ new Permission(entry.getValue().getActions()));
} else {
- userPerms.put(entry.getKey(), entry.getValue());
+ newCache.putUser(entry.getKey(), new Permission(entry.getValue().getActions()));
}
}
- TABLE_GROUP_CACHE.put(table, groupPerms);
- TABLE_USER_CACHE.put(table, userPerms);
+ globalCache = newCache;
+ } catch (IOException e) {
+ // Never happens
+ LOG.error("Error occured while updating the global cache", e);
}
}
- private List getUserPermissions(String username, byte[] table) {
- ListMultimap tablePerms = TABLE_USER_CACHE.get(table);
- if (tablePerms != null) {
- return tablePerms.get(username);
+ /**
+ * Updates the internal permissions cache for a single table, splitting
+ * the permissions listed into separate caches for users and groups to optimize
+ * group lookups.
+ *
+ * @param table
+ * @param tablePerms
+ */
+ private void updateTableCache(byte[] table, ListMultimap tablePerms) {
+ PermissionCache newTablePerms = new PermissionCache();
+
+ for (Map.Entry entry : tablePerms.entries()) {
+ if (AccessControlLists.isGroupPrincipal(entry.getKey())) {
+ newTablePerms.putGroup(AccessControlLists.getGroupName(entry.getKey()), entry.getValue());
+ } else {
+ newTablePerms.putUser(entry.getKey(), entry.getValue());
+ }
}
- return null;
+ tableCache.put(table, newTablePerms);
}
- private List getGroupPermissions(String groupName, byte[] table) {
- ListMultimap tablePerms = TABLE_GROUP_CACHE.get(table);
- if (tablePerms != null) {
- return tablePerms.get(groupName);
+ private PermissionCache getTablePermissions(byte[] table) {
+ if (!tableCache.containsKey(table)) {
+ tableCache.putIfAbsent(table, new PermissionCache());
}
-
- return null;
+ return tableCache.get(table);
}
/**
@@ -191,14 +248,14 @@ public boolean authorize(User user, Permission.Action action) {
return false;
}
- if (authorize(USER_CACHE.get(user.getShortName()), action)) {
+ if (authorize(globalCache.getUser(user.getShortName()), action)) {
return true;
}
String[] groups = user.getGroupNames();
if (groups != null) {
for (String group : groups) {
- if (authorize(GROUP_CACHE.get(group), action)) {
+ if (authorize(globalCache.getGroup(group), action)) {
return true;
}
}
@@ -227,18 +284,20 @@ private boolean authorize(List perms, byte[] table, byte[] fami
public boolean authorize(User user, byte[] table, KeyValue kv,
TablePermission.Action action) {
- List userPerms = getUserPermissions(
- user.getShortName(), table);
- if (authorize(userPerms, table, kv, action)) {
- return true;
- }
+ PermissionCache tablePerms = tableCache.get(table);
+ if (tablePerms != null) {
+ List userPerms = tablePerms.getUser(user.getShortName());
+ if (authorize(userPerms, table, kv, action)) {
+ return true;
+ }
- String[] groupNames = user.getGroupNames();
- if (groupNames != null) {
- for (String group : groupNames) {
- List groupPerms = getGroupPermissions(group, table);
- if (authorize(groupPerms, table, kv, action)) {
- return true;
+ String[] groupNames = user.getGroupNames();
+ if (groupNames != null) {
+ for (String group : groupNames) {
+ List groupPerms = tablePerms.getGroup(group);
+ if (authorize(groupPerms, table, kv, action)) {
+ return true;
+ }
}
}
}
@@ -267,7 +326,7 @@ private boolean authorize(List perms, byte[] table, KeyValue kv
* stored user permissions.
*/
public boolean authorizeUser(String username, Permission.Action action) {
- return authorize(USER_CACHE.get(username), action);
+ return authorize(globalCache.getUser(username), action);
}
/**
@@ -291,7 +350,7 @@ public boolean authorizeUser(String username, byte[] table, byte[] family,
if (authorizeUser(username, action)) {
return true;
}
- return authorize(getUserPermissions(username, table), table, family,
+ return authorize(getTablePermissions(table).getUser(username), table, family,
qualifier, action);
}
@@ -301,7 +360,7 @@ public boolean authorizeUser(String username, byte[] table, byte[] family,
* permissions.
*/
public boolean authorizeGroup(String groupName, Permission.Action action) {
- return authorize(GROUP_CACHE.get(groupName), action);
+ return authorize(globalCache.getGroup(groupName), action);
}
/**
@@ -319,7 +378,7 @@ public boolean authorizeGroup(String groupName, byte[] table, byte[] family,
if (authorizeGroup(groupName, action)) {
return true;
}
- return authorize(getGroupPermissions(groupName, table), table, family, action);
+ return authorize(getTablePermissions(table).getGroup(groupName), table, family, action);
}
public boolean authorize(User user, byte[] table, byte[] family,
@@ -352,24 +411,26 @@ public boolean authorize(User user, byte[] table, byte[] family,
*/
public boolean matchPermission(User user,
byte[] table, byte[] family, TablePermission.Action action) {
- List userPerms = getUserPermissions(
- user.getShortName(), table);
- if (userPerms != null) {
- for (TablePermission p : userPerms) {
- if (p.matchesFamily(table, family, action)) {
- return true;
+ PermissionCache tablePerms = tableCache.get(table);
+ if (tablePerms != null) {
+ List userPerms = tablePerms.getUser(user.getShortName());
+ if (userPerms != null) {
+ for (TablePermission p : userPerms) {
+ if (p.matchesFamily(table, family, action)) {
+ return true;
+ }
}
}
- }
- String[] groups = user.getGroupNames();
- if (groups != null) {
- for (String group : groups) {
- List groupPerms = getGroupPermissions(group, table);
- if (groupPerms != null) {
- for (TablePermission p : groupPerms) {
- if (p.matchesFamily(table, family, action)) {
- return true;
+ String[] groups = user.getGroupNames();
+ if (groups != null) {
+ for (String group : groups) {
+ List groupPerms = tablePerms.getGroup(group);
+ if (groupPerms != null) {
+ for (TablePermission p : groupPerms) {
+ if (p.matchesFamily(table, family, action)) {
+ return true;
+ }
}
}
}
@@ -382,24 +443,26 @@ public boolean matchPermission(User user,
public boolean matchPermission(User user,
byte[] table, byte[] family, byte[] qualifier,
TablePermission.Action action) {
- List userPerms = getUserPermissions(
- user.getShortName(), table);
- if (userPerms != null) {
- for (TablePermission p : userPerms) {
- if (p.matchesFamilyQualifier(table, family, qualifier, action)) {
- return true;
+ PermissionCache tablePerms = tableCache.get(table);
+ if (tablePerms != null) {
+ List userPerms = tablePerms.getUser(user.getShortName());
+ if (userPerms != null) {
+ for (TablePermission p : userPerms) {
+ if (p.matchesFamilyQualifier(table, family, qualifier, action)) {
+ return true;
+ }
}
}
- }
- String[] groups = user.getGroupNames();
- if (groups != null) {
- for (String group : groups) {
- List groupPerms = getGroupPermissions(group, table);
- if (groupPerms != null) {
- for (TablePermission p : groupPerms) {
- if (p.matchesFamilyQualifier(table, family, qualifier, action)) {
- return true;
+ String[] groups = user.getGroupNames();
+ if (groups != null) {
+ for (String group : groups) {
+ List groupPerms = tablePerms.getGroup(group);
+ if (groupPerms != null) {
+ for (TablePermission p : groupPerms) {
+ if (p.matchesFamilyQualifier(table, family, qualifier, action)) {
+ return true;
+ }
}
}
}
@@ -410,8 +473,7 @@ public boolean matchPermission(User user,
}
public void remove(byte[] table) {
- TABLE_USER_CACHE.remove(table);
- TABLE_GROUP_CACHE.remove(table);
+ tableCache.remove(table);
}
/**
@@ -423,13 +485,9 @@ public void remove(byte[] table) {
*/
public void setUserPermissions(String username, byte[] table,
List perms) {
- ListMultimap tablePerms = TABLE_USER_CACHE.get(table);
- if (tablePerms == null) {
- tablePerms = ArrayListMultimap.create();
- TABLE_USER_CACHE.put(table, tablePerms);
- }
- tablePerms.replaceValues(username, perms);
- writeToZooKeeper(table, tablePerms, TABLE_GROUP_CACHE.get(table));
+ PermissionCache tablePerms = getTablePermissions(table);
+ tablePerms.replaceUser(username, perms);
+ writeToZooKeeper(table, tablePerms);
}
/**
@@ -441,30 +499,18 @@ public void setUserPermissions(String username, byte[] table,
*/
public void setGroupPermissions(String group, byte[] table,
List perms) {
- ListMultimap tablePerms = TABLE_GROUP_CACHE.get(table);
- if (tablePerms == null) {
- tablePerms = ArrayListMultimap.create();
- TABLE_GROUP_CACHE.put(table, tablePerms);
- }
- tablePerms.replaceValues(group, perms);
- writeToZooKeeper(table, TABLE_USER_CACHE.get(table), tablePerms);
+ PermissionCache tablePerms = getTablePermissions(table);
+ tablePerms.replaceGroup(group, perms);
+ writeToZooKeeper(table, tablePerms);
}
public void writeToZooKeeper(byte[] table,
- ListMultimap userPerms,
- ListMultimap groupPerms) {
- ListMultimap tmp = ArrayListMultimap.create();
- if (userPerms != null) {
- tmp.putAll(userPerms);
- }
- if (groupPerms != null) {
- for (String group : groupPerms.keySet()) {
- tmp.putAll(AccessControlLists.GROUP_PREFIX + group,
- groupPerms.get(group));
- }
+ PermissionCache tablePerms) {
+ byte[] serialized = new byte[0];
+ if (tablePerms != null) {
+ serialized = AccessControlLists.writePermissionsAsBytes(tablePerms.getAllPermissions(), conf);
}
- byte[] serialized = AccessControlLists.writePermissionsAsBytes(tmp, conf);
- zkperms.writeToZookeeper(Bytes.toString(table), serialized);
+ zkperms.writeToZookeeper(table, serialized);
}
static Map managerMap =
diff --git a/security/src/main/java/org/apache/hadoop/hbase/security/access/UserPermission.java b/security/src/main/java/org/apache/hadoop/hbase/security/access/UserPermission.java
index 8a5c467ab4ad..fd5b755f7d79 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/security/access/UserPermission.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/security/access/UserPermission.java
@@ -40,6 +40,27 @@ public UserPermission() {
super();
}
+ /**
+ * Creates a new instance for the given user.
+ * @param user the user
+ * @param assigned the list of allowed actions
+ */
+ public UserPermission(byte[] user, Action... assigned) {
+ super(null, null, null, assigned);
+ this.user = user;
+ }
+
+ /**
+ * Creates a new instance for the given user,
+ * matching the actions with the given codes.
+ * @param user the user
+ * @param actionCodes the list of allowed action codes
+ */
+ public UserPermission(byte[] user, byte[] actionCodes) {
+ super(null, null, null, actionCodes);
+ this.user = user;
+ }
+
/**
* Creates a new instance for the given user, table and column family.
* @param user the user
@@ -92,6 +113,14 @@ public byte[] getUser() {
return user;
}
+ /**
+ * Returns true if this permission describes a global user permission.
+ */
+ public boolean isGlobal() {
+ byte[] tableName = getTable();
+ return(tableName == null || tableName.length == 0);
+ }
+
@Override
public boolean equals(Object obj) {
if (!(obj instanceof UserPermission)) {
diff --git a/security/src/main/java/org/apache/hadoop/hbase/security/access/ZKPermissionWatcher.java b/security/src/main/java/org/apache/hadoop/hbase/security/access/ZKPermissionWatcher.java
index f7e8654abe04..e9cf8975968f 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/security/access/ZKPermissionWatcher.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/security/access/ZKPermissionWatcher.java
@@ -146,19 +146,35 @@ private void refreshNodes(List nodes) {
* @param tableName
* @param permsData
*/
- public void writeToZookeeper(String tableName,
- byte[] permsData) {
- String zkNode =
- ZKUtil.joinZNode(ZKUtil.joinZNode(watcher.baseZNode, ACL_NODE),
- tableName);
+ public void writeToZookeeper(byte[] tableName, byte[] parmsData) {
+ String zkNode = ZKUtil.joinZNode(watcher.baseZNode, ACL_NODE);
+ zkNode = ZKUtil.joinZNode(zkNode, Bytes.toString(tableName));
+
try {
ZKUtil.createWithParents(watcher, zkNode);
- ZKUtil.updateExistingNodeData(watcher, zkNode,
- permsData, -1);
+ ZKUtil.updateExistingNodeData(watcher, zkNode, parmsData, -1);
} catch (KeeperException e) {
- LOG.error("Failed updating permissions for table '" + tableName +
- "'", e);
+ LOG.error("Failed updating permissions for table '" +
+ Bytes.toString(tableName) + "'", e);
watcher.abort("Failed writing node "+zkNode+" to zookeeper", e);
}
}
+
+ /***
+ * Delete the acl notify node of table
+ * @param tableName
+ */
+ public void deleteTableACLNode(final byte[] tableName) {
+ String zkNode = ZKUtil.joinZNode(watcher.baseZNode, ACL_NODE);
+ zkNode = ZKUtil.joinZNode(zkNode, Bytes.toString(tableName));
+
+ try {
+ ZKUtil.deleteNode(watcher, zkNode);
+ } catch (KeeperException.NoNodeException e) {
+ LOG.warn("No acl notify node of table '" + tableName + "'");
+ } catch (KeeperException e) {
+ LOG.error("Failed deleting acl node of table '" + tableName + "'", e);
+ watcher.abort("Failed deleting node " + zkNode, e);
+ }
+ }
}
diff --git a/security/src/main/java/org/apache/hadoop/hbase/security/token/TokenProvider.java b/security/src/main/java/org/apache/hadoop/hbase/security/token/TokenProvider.java
index 0e7e87267b53..0a3a3a67ab18 100644
--- a/security/src/main/java/org/apache/hadoop/hbase/security/token/TokenProvider.java
+++ b/security/src/main/java/org/apache/hadoop/hbase/security/token/TokenProvider.java
@@ -31,6 +31,7 @@
import org.apache.hadoop.hbase.security.AccessDeniedException;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
import org.apache.hadoop.security.token.SecretManager;
import org.apache.hadoop.security.token.Token;
@@ -80,8 +81,7 @@ public Token getAuthenticationToken()
}
if (currentUser == null) {
throw new AccessDeniedException("No authenticated user for request!");
- } else if (ugi.getAuthenticationMethod() !=
- UserGroupInformation.AuthenticationMethod.KERBEROS) {
+ } else if (!isAllowedDelegationTokenOp(ugi)) {
LOG.warn("Token generation denied for user="+currentUser.getName()
+", authMethod="+ugi.getAuthenticationMethod());
throw new AccessDeniedException(
@@ -91,6 +91,23 @@ public Token getAuthenticationToken()
return secretManager.generateToken(currentUser.getName());
}
+ /**
+ * @param ugi
+ * @return true if delegation token operation is allowed
+ */
+ private boolean isAllowedDelegationTokenOp(UserGroupInformation ugi) throws IOException {
+ AuthenticationMethod authMethod = ugi.getAuthenticationMethod();
+ if (authMethod == AuthenticationMethod.PROXY) {
+ authMethod = ugi.getRealUser().getAuthenticationMethod();
+ }
+ if (authMethod != AuthenticationMethod.KERBEROS
+ && authMethod != AuthenticationMethod.KERBEROS_SSL
+ && authMethod != AuthenticationMethod.CERTIFICATE) {
+ return false;
+ }
+ return true;
+ }
+
@Override
public String whoami() {
return RequestContext.getRequestUserName();
diff --git a/security/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java b/security/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
new file mode 100644
index 000000000000..10aea8089b0b
--- /dev/null
+++ b/security/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
@@ -0,0 +1,55 @@
+/**
+ * Copyright The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.LargeTests;
+import org.apache.hadoop.hbase.security.access.AccessControlLists;
+import org.apache.hadoop.hbase.security.access.SecureTestUtil;
+import org.junit.BeforeClass;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Reruns TestLoadIncrementalHFiles using LoadIncrementalHFiles using secure mode.
+ * This suite is unable to verify the security handoff/turnover
+ * as miniCluster is running as system user thus has root privileges
+ * and delegation tokens don't seem to work on miniDFS.
+ *
+ * Thus SecureBulkload can only be completely verified by running
+ * integration tests against a secure cluster. This suite is still
+ * invaluable as it verifies the other mechanisms that need to be
+ * supported as part of a LoadIncrementalFiles call.
+ */
+@Category(LargeTests.class)
+public class TestSecureLoadIncrementalHFiles extends TestLoadIncrementalHFiles{
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ useSecureHBaseOverride = true;
+ // setup configuration
+ SecureTestUtil.enableSecurity(util.getConfiguration());
+
+ util.startMiniCluster();
+
+ // Wait for the ACL table to become available
+ util.waitTableAvailable(AccessControlLists.ACL_TABLE_NAME, 30000);
+ }
+
+}
+
diff --git a/security/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java b/security/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
new file mode 100644
index 000000000000..38ac80899111
--- /dev/null
+++ b/security/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.LargeTests;
+import org.apache.hadoop.hbase.client.UserProvider;
+import org.apache.hadoop.hbase.security.access.AccessControlLists;
+import org.apache.hadoop.hbase.security.access.SecureTestUtil;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+
+/**
+ * Reruns TestSecureLoadIncrementalHFilesSplitRecovery
+ * using LoadIncrementalHFiles in secure mode.
+ * This suite is unable to verify the security handoff/turnover
+ * as miniCluster is running as system user thus has root privileges
+ * and delegation tokens don't seem to work on miniDFS.
+ *
+ * Thus SecureBulkload can only be completely verified by running
+ * integration tests against a secure cluster. This suite is still
+ * invaluable as it verifies the other mechanisms that need to be
+ * supported as part of a LoadIncrementalFiles call.
+ */
+@Category(LargeTests.class)
+public class TestSecureLoadIncrementalHFilesSplitRecovery extends TestLoadIncrementalHFilesSplitRecovery {
+
+ //This "overrides" the parent static method
+ //make sure they are in sync
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ useSecureHBaseOverride = true;
+ util = new HBaseTestingUtility();
+ // setup configuration
+ SecureTestUtil.enableSecurity(util.getConfiguration());
+ util.startMiniCluster();
+
+ // Wait for the ACL table to become available
+ util.waitTableAvailable(AccessControlLists.ACL_TABLE_NAME, 30000);
+ }
+
+ //Disabling this test as it does not work in secure mode
+ @Test
+ @Override
+ public void testBulkLoadPhaseFailure() {
+ }
+}
+
diff --git a/security/src/test/java/org/apache/hadoop/hbase/security/access/SecureTestUtil.java b/security/src/test/java/org/apache/hadoop/hbase/security/access/SecureTestUtil.java
index 1a087b6a9268..5d55760a5a7c 100644
--- a/security/src/test/java/org/apache/hadoop/hbase/security/access/SecureTestUtil.java
+++ b/security/src/test/java/org/apache/hadoop/hbase/security/access/SecureTestUtil.java
@@ -21,6 +21,7 @@
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
import org.apache.hadoop.hbase.ipc.SecureRpcEngine;
import org.apache.hadoop.hbase.security.User;
@@ -32,8 +33,10 @@ public static void enableSecurity(Configuration conf) throws IOException {
conf.set("hadoop.security.authorization", "false");
conf.set("hadoop.security.authentication", "simple");
conf.set("hbase.rpc.engine", SecureRpcEngine.class.getName());
- conf.set("hbase.coprocessor.master.classes", AccessController.class.getName());
- conf.set("hbase.coprocessor.region.classes", AccessController.class.getName());
+ conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, AccessController.class.getName());
+ conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, AccessController.class.getName()+
+ ","+SecureBulkLoadEndpoint.class.getName());
+ conf.set(CoprocessorHost.REGIONSERVER_COPROCESSOR_CONF_KEY, AccessController.class.getName());
// add the process running user to superusers
String currentUser = User.getCurrent().getName();
conf.set("hbase.superuser", "admin,"+currentUser);
diff --git a/security/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessControlFilter.java b/security/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessControlFilter.java
index 0a2cad27c73b..7f7ef55ec5ee 100644
--- a/security/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessControlFilter.java
+++ b/security/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessControlFilter.java
@@ -70,7 +70,7 @@ public static void setupBeforeClass() throws Exception {
conf.set("hbase.superuser", conf.get("hbase.superuser", "") +
String.format(",%s.hfs.0,%s.hfs.1,%s.hfs.2", baseuser, baseuser, baseuser));
TEST_UTIL.startMiniCluster();
- TEST_UTIL.waitTableAvailable(AccessControlLists.ACL_TABLE_NAME, 5000);
+ TEST_UTIL.waitTableAvailable(AccessControlLists.ACL_TABLE_NAME, 30000);
ADMIN = User.createUserForTesting(conf, "admin", new String[]{"supergroup"});
READER = User.createUserForTesting(conf, "reader", new String[0]);
@@ -95,10 +95,12 @@ public Object run() throws Exception {
AccessControlLists.ACL_TABLE_NAME);
AccessControllerProtocol acls = aclmeta.coprocessorProxy(
AccessControllerProtocol.class, Bytes.toBytes("testtable"));
- TablePermission perm = new TablePermission(TABLE, null, Permission.Action.READ);
- acls.grant(Bytes.toBytes(READER.getShortName()), perm);
- perm = new TablePermission(TABLE, FAMILY, PUBLIC_COL, Permission.Action.READ);
- acls.grant(Bytes.toBytes(LIMITED.getShortName()), perm);
+ UserPermission perm = new UserPermission(Bytes.toBytes(READER.getShortName()),
+ TABLE, null, Permission.Action.READ);
+ acls.grant(perm);
+ perm = new UserPermission(Bytes.toBytes(LIMITED.getShortName()),
+ TABLE, FAMILY, PUBLIC_COL, Permission.Action.READ);
+ acls.grant(perm);
return null;
}
});
diff --git a/security/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController.java b/security/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController.java
index fe04c5a69ba6..688e30d9d835 100644
--- a/security/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController.java
+++ b/security/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hbase.security.access;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -26,18 +27,27 @@
import java.security.PrivilegedExceptionAction;
import java.util.List;
import java.util.Map;
+import java.util.NavigableMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.LargeTests;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.UnknownRowLockException;
+import org.apache.hadoop.hbase.client.Append;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
@@ -51,10 +61,26 @@
import org.apache.hadoop.hbase.coprocessor.CoprocessorException;
import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.RegionServerCoprocessorEnvironment;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost;
+import org.apache.hadoop.hbase.regionserver.RegionServerCoprocessorHost;
import org.apache.hadoop.hbase.security.AccessDeniedException;
import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.access.AccessControlLists;
+import org.apache.hadoop.hbase.security.access.AccessControllerProtocol;
+import org.apache.hadoop.hbase.security.access.Permission;
+import org.apache.hadoop.hbase.security.access.UserPermission;
+import org.apache.hadoop.hbase.security.access.Permission.Action;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.apache.hadoop.ipc.RemoteException;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -65,66 +91,109 @@
* levels of authorized users.
*/
@Category(LargeTests.class)
+@SuppressWarnings("rawtypes")
public class TestAccessController {
- private static Log LOG = LogFactory.getLog(TestAccessController.class);
+ private static final Log LOG = LogFactory.getLog(TestAccessController.class);
private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static Configuration conf;
// user with all permissions
private static User SUPERUSER;
- // table owner user
- private static User USER_OWNER;
+ // user granted with all global permission
+ private static User USER_ADMIN;
// user with rw permissions
private static User USER_RW;
+ // user with rw permissions on table.
+ private static User USER_RW_ON_TABLE;
// user with read-only permissions
private static User USER_RO;
+ // user is table owner. will have all permissions on table
+ private static User USER_OWNER;
+ // user with create table permissions alone
+ private static User USER_CREATE;
// user with no permissions
private static User USER_NONE;
private static byte[] TEST_TABLE = Bytes.toBytes("testtable");
+ private static byte[] TEST_TABLE2 = Bytes.toBytes("testtable2");
private static byte[] TEST_FAMILY = Bytes.toBytes("f1");
private static MasterCoprocessorEnvironment CP_ENV;
+ private static RegionCoprocessorEnvironment RCP_ENV;
+ private static RegionServerCoprocessorEnvironment RSCP_ENV;
private static AccessController ACCESS_CONTROLLER;
@BeforeClass
public static void setupBeforeClass() throws Exception {
// setup configuration
conf = TEST_UTIL.getConfiguration();
+ conf.set("hbase.master.hfilecleaner.plugins",
+ "org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner," +
+ "org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner");
+ conf.set("hbase.master.logcleaner.plugins",
+ "org.apache.hadoop.hbase.master.snapshot.SnapshotLogCleaner");
SecureTestUtil.enableSecurity(conf);
TEST_UTIL.startMiniCluster();
- MasterCoprocessorHost cpHost = TEST_UTIL.getMiniHBaseCluster()
- .getMaster().getCoprocessorHost();
+ MasterCoprocessorHost cpHost = TEST_UTIL.getMiniHBaseCluster().getMaster().getCoprocessorHost();
cpHost.load(AccessController.class, Coprocessor.PRIORITY_HIGHEST, conf);
- ACCESS_CONTROLLER = (AccessController)cpHost.findCoprocessor(
- AccessController.class.getName());
+ ACCESS_CONTROLLER = (AccessController) cpHost.findCoprocessor(AccessController.class.getName());
CP_ENV = cpHost.createEnvironment(AccessController.class, ACCESS_CONTROLLER,
- Coprocessor.PRIORITY_HIGHEST, 1, conf);
+ Coprocessor.PRIORITY_HIGHEST, 1, conf);
+ RegionServerCoprocessorHost rsHost = TEST_UTIL.getMiniHBaseCluster().getRegionServer(0)
+ .getCoprocessorHost();
+ RSCP_ENV = rsHost.createEnvironment(AccessController.class, ACCESS_CONTROLLER,
+ Coprocessor.PRIORITY_HIGHEST, 1, conf);
+
+ // Wait for the ACL table to become available
+ TEST_UTIL.waitTableAvailable(AccessControlLists.ACL_TABLE_NAME, 30000);
// create a set of test users
- SUPERUSER = User.createUserForTesting(conf, "admin", new String[]{"supergroup"});
- USER_OWNER = User.createUserForTesting(conf, "owner", new String[0]);
+ SUPERUSER = User.createUserForTesting(conf, "admin", new String[] { "supergroup" });
+ USER_ADMIN = User.createUserForTesting(conf, "admin2", new String[0]);
USER_RW = User.createUserForTesting(conf, "rwuser", new String[0]);
USER_RO = User.createUserForTesting(conf, "rouser", new String[0]);
+ USER_RW_ON_TABLE = User.createUserForTesting(conf, "rwuser_1", new String[0]);
+ USER_OWNER = User.createUserForTesting(conf, "owner", new String[0]);
+ USER_CREATE = User.createUserForTesting(conf, "tbl_create", new String[0]);
USER_NONE = User.createUserForTesting(conf, "nouser", new String[0]);
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
HTableDescriptor htd = new HTableDescriptor(TEST_TABLE);
htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
- htd.setOwnerString(USER_OWNER.getShortName());
+ htd.setOwner(USER_OWNER);
admin.createTable(htd);
+ TEST_UTIL.waitTableEnabled(TEST_TABLE, 5000);
+
+ HRegion region = TEST_UTIL.getHBaseCluster().getRegions(TEST_TABLE).get(0);
+ RegionCoprocessorHost rcpHost = region.getCoprocessorHost();
+ RCP_ENV = rcpHost.createEnvironment(AccessController.class, ACCESS_CONTROLLER,
+ Coprocessor.PRIORITY_HIGHEST, 1, conf);
// initilize access control
- HTable meta = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
- AccessControllerProtocol protocol =
- meta.coprocessorProxy(AccessControllerProtocol.class, TEST_TABLE);
- protocol.grant(Bytes.toBytes(USER_RW.getShortName()),
- new TablePermission(TEST_TABLE, TEST_FAMILY, Permission.Action.READ,
- Permission.Action.WRITE));
-
- protocol.grant(Bytes.toBytes(USER_RO.getShortName()),
- new TablePermission(TEST_TABLE, TEST_FAMILY, Permission.Action.READ));
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ TEST_TABLE);
+
+ protocol.grant(new UserPermission(Bytes.toBytes(USER_ADMIN.getShortName()),
+ Permission.Action.ADMIN, Permission.Action.CREATE, Permission.Action.READ,
+ Permission.Action.WRITE));
+
+ protocol.grant(new UserPermission(Bytes.toBytes(USER_RW.getShortName()), TEST_TABLE,
+ TEST_FAMILY, Permission.Action.READ, Permission.Action.WRITE));
+
+ protocol.grant(new UserPermission(Bytes.toBytes(USER_RO.getShortName()), TEST_TABLE,
+ TEST_FAMILY, Permission.Action.READ));
+
+ protocol.grant(new UserPermission(Bytes.toBytes(USER_CREATE.getShortName()), TEST_TABLE, null,
+ Permission.Action.CREATE));
+
+ protocol.grant(new UserPermission(Bytes.toBytes(USER_RW_ON_TABLE.getShortName()), TEST_TABLE,
+ null, Permission.Action.READ, Permission.Action.WRITE));
+ } finally {
+ acl.close();
+ }
}
@AfterClass
@@ -132,54 +201,70 @@ public static void tearDownAfterClass() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
- public void verifyAllowed(User user, PrivilegedExceptionAction action)
- throws Exception {
- try {
- user.runAs(action);
- } catch (AccessDeniedException ade) {
- fail("Expected action to pass for user '" + user.getShortName() +
- "' but was denied");
+ public void verifyAllowed(User user, PrivilegedExceptionAction... actions) throws Exception {
+ for (PrivilegedExceptionAction action : actions) {
+ try {
+ user.runAs(action);
+ } catch (AccessDeniedException ade) {
+ fail("Expected action to pass for user '" + user.getShortName() + "' but was denied");
+ } catch (UnknownRowLockException exp){
+ //expected
+ }
}
}
- public void verifyAllowed(PrivilegedExceptionAction action, User... users)
- throws Exception {
+ public void verifyAllowed(PrivilegedExceptionAction action, User... users) throws Exception {
for (User user : users) {
verifyAllowed(user, action);
}
}
- public void verifyDenied(User user, PrivilegedExceptionAction action)
- throws Exception {
- try {
- user.runAs(action);
- fail("Expected AccessDeniedException for user '" + user.getShortName() + "'");
- } catch (RetriesExhaustedWithDetailsException e) {
- // in case of batch operations, and put, the client assembles a
- // RetriesExhaustedWithDetailsException instead of throwing an
- // AccessDeniedException
- boolean isAccessDeniedException = false;
- for ( Throwable ex : e.getCauses()) {
- if (ex instanceof AccessDeniedException) {
- isAccessDeniedException = true;
- break;
+ public void verifyDenied(User user, PrivilegedExceptionAction... actions) throws Exception {
+ for (PrivilegedExceptionAction action : actions) {
+ try {
+ user.runAs(action);
+ fail("Expected AccessDeniedException for user '" + user.getShortName() + "'");
+ } catch (AccessDeniedException ade) {
+ // expected result
+ } catch (IOException e) {
+ boolean isAccessDeniedException = false;
+ if(e instanceof RetriesExhaustedWithDetailsException) {
+ // in case of batch operations, and put, the client assembles a
+ // RetriesExhaustedWithDetailsException instead of throwing an
+ // AccessDeniedException
+ for(Throwable ex : ((RetriesExhaustedWithDetailsException) e).getCauses()) {
+ if (ex instanceof AccessDeniedException) {
+ isAccessDeniedException = true;
+ break;
+ }
+ }
+ }
+ else {
+ if (e instanceof RemoteException) {
+ e = ((RemoteException)e).unwrapRemoteException();
+ }
+ // For doBulkLoad calls AccessDeniedException
+ // is buried in the stack trace
+ Throwable ex = e;
+ do {
+ if (ex instanceof AccessDeniedException) {
+ isAccessDeniedException = true;
+ break;
+ }
+ } while((ex = ex.getCause()) != null);
+ }
+ if (!isAccessDeniedException) {
+ fail("Not receiving AccessDeniedException for user '" + user.getShortName() + "'");
}
}
- if (!isAccessDeniedException ) {
- fail("Not receiving AccessDeniedException for user '" +
- user.getShortName() + "'");
- }
- } catch (AccessDeniedException ade) {
- // expected result
}
}
- public void verifyDenied(PrivilegedExceptionAction action, User... users)
- throws Exception {
- for (User user : users) {
- verifyDenied(user, action);
- }
+ public void verifyDenied(PrivilegedExceptionAction action, User... users) throws Exception {
+ for (User user : users) {
+ verifyDenied(user, action);
}
+ }
@Test
public void testTableCreate() throws Exception {
@@ -187,61 +272,47 @@ public void testTableCreate() throws Exception {
public Object run() throws Exception {
HTableDescriptor htd = new HTableDescriptor("testnewtable");
htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
- ACCESS_CONTROLLER.preCreateTable(
- ObserverContext.createAndPrepare(CP_ENV, null), htd, null);
+ ACCESS_CONTROLLER.preCreateTable(ObserverContext.createAndPrepare(CP_ENV, null), htd, null);
return null;
}
};
// verify that superuser can create tables
- verifyAllowed(SUPERUSER, createTable);
+ verifyAllowed(createTable, SUPERUSER, USER_ADMIN);
// all others should be denied
- verifyDenied(USER_OWNER, createTable);
- verifyDenied(USER_RW, createTable);
- verifyDenied(USER_RO, createTable);
- verifyDenied(USER_NONE, createTable);
+ verifyDenied(createTable, USER_CREATE, USER_RW, USER_RO, USER_NONE);
}
@Test
public void testTableModify() throws Exception {
- PrivilegedExceptionAction disableTable = new PrivilegedExceptionAction() {
+ PrivilegedExceptionAction modifyTable = new PrivilegedExceptionAction() {
public Object run() throws Exception {
HTableDescriptor htd = new HTableDescriptor(TEST_TABLE);
htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
- htd.addFamily(new HColumnDescriptor("fam_"+User.getCurrent().getShortName()));
- ACCESS_CONTROLLER.preModifyTable(ObserverContext.createAndPrepare(CP_ENV, null), TEST_TABLE, htd);
+ htd.addFamily(new HColumnDescriptor("fam_" + User.getCurrent().getShortName()));
+ ACCESS_CONTROLLER.preModifyTable(ObserverContext.createAndPrepare(CP_ENV, null),
+ TEST_TABLE, htd);
return null;
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, disableTable);
- verifyDenied(USER_RW, disableTable);
- verifyDenied(USER_RO, disableTable);
- verifyDenied(USER_NONE, disableTable);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, disableTable);
+ verifyAllowed(modifyTable, SUPERUSER, USER_ADMIN, USER_CREATE, USER_OWNER);
+ verifyDenied(modifyTable, USER_RW, USER_RO, USER_NONE);
}
@Test
public void testTableDelete() throws Exception {
- PrivilegedExceptionAction disableTable = new PrivilegedExceptionAction() {
+ PrivilegedExceptionAction deleteTable = new PrivilegedExceptionAction() {
public Object run() throws Exception {
- ACCESS_CONTROLLER.preDeleteTable(ObserverContext.createAndPrepare(CP_ENV, null), TEST_TABLE);
+ ACCESS_CONTROLLER
+ .preDeleteTable(ObserverContext.createAndPrepare(CP_ENV, null), TEST_TABLE);
return null;
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, disableTable);
- verifyDenied(USER_RW, disableTable);
- verifyDenied(USER_RO, disableTable);
- verifyDenied(USER_NONE, disableTable);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, disableTable);
+ verifyAllowed(deleteTable, SUPERUSER, USER_ADMIN, USER_CREATE, USER_OWNER);
+ verifyDenied(deleteTable, USER_RW, USER_RO, USER_NONE);
}
@Test
@@ -249,19 +320,14 @@ public void testAddColumn() throws Exception {
final HColumnDescriptor hcd = new HColumnDescriptor("fam_new");
PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
public Object run() throws Exception {
- ACCESS_CONTROLLER.preAddColumn(ObserverContext.createAndPrepare(CP_ENV, null), TEST_TABLE, hcd);
+ ACCESS_CONTROLLER.preAddColumn(ObserverContext.createAndPrepare(CP_ENV, null), TEST_TABLE,
+ hcd);
return null;
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, action);
- verifyDenied(USER_RW, action);
- verifyDenied(USER_RO, action);
- verifyDenied(USER_NONE, action);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, action);
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_CREATE, USER_OWNER);
+ verifyDenied(action, USER_RW, USER_RO, USER_NONE);
}
@Test
@@ -270,151 +336,136 @@ public void testModifyColumn() throws Exception {
hcd.setMaxVersions(10);
PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
public Object run() throws Exception {
- ACCESS_CONTROLLER.preModifyColumn(ObserverContext.createAndPrepare(CP_ENV, null), TEST_TABLE, hcd);
+ ACCESS_CONTROLLER.preModifyColumn(ObserverContext.createAndPrepare(CP_ENV, null),
+ TEST_TABLE, hcd);
return null;
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, action);
- verifyDenied(USER_RW, action);
- verifyDenied(USER_RO, action);
- verifyDenied(USER_NONE, action);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, action);
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_CREATE, USER_OWNER);
+ verifyDenied(action, USER_RW, USER_RO, USER_NONE);
}
@Test
public void testDeleteColumn() throws Exception {
PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
public Object run() throws Exception {
- ACCESS_CONTROLLER.preDeleteColumn(ObserverContext.createAndPrepare(CP_ENV, null), TEST_TABLE, TEST_FAMILY);
+ ACCESS_CONTROLLER.preDeleteColumn(ObserverContext.createAndPrepare(CP_ENV, null),
+ TEST_TABLE, TEST_FAMILY);
return null;
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, action);
- verifyDenied(USER_RW, action);
- verifyDenied(USER_RO, action);
- verifyDenied(USER_NONE, action);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, action);
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_CREATE, USER_OWNER);
+ verifyDenied(action, USER_RW, USER_RO, USER_NONE);
}
@Test
public void testTableDisable() throws Exception {
PrivilegedExceptionAction disableTable = new PrivilegedExceptionAction() {
public Object run() throws Exception {
- ACCESS_CONTROLLER.preDisableTable(ObserverContext.createAndPrepare(CP_ENV, null), TEST_TABLE);
+ ACCESS_CONTROLLER.preDisableTable(ObserverContext.createAndPrepare(CP_ENV, null),
+ TEST_TABLE);
return null;
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, disableTable);
- verifyDenied(USER_RW, disableTable);
- verifyDenied(USER_RO, disableTable);
- verifyDenied(USER_NONE, disableTable);
+ PrivilegedExceptionAction disableAclTable = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preDisableTable(ObserverContext.createAndPrepare(CP_ENV, null),
+ AccessControlLists.ACL_TABLE_NAME);
+ return null;
+ }
+ };
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, disableTable);
+ verifyAllowed(disableTable, SUPERUSER, USER_ADMIN, USER_CREATE, USER_OWNER);
+ verifyDenied(disableTable, USER_RW, USER_RO, USER_NONE);
+
+ // No user should be allowed to disable _acl_ table
+ verifyDenied(disableAclTable, SUPERUSER, USER_ADMIN, USER_CREATE, USER_OWNER, USER_RW, USER_RO);
}
@Test
public void testTableEnable() throws Exception {
PrivilegedExceptionAction enableTable = new PrivilegedExceptionAction() {
public Object run() throws Exception {
- ACCESS_CONTROLLER.preEnableTable(ObserverContext.createAndPrepare(CP_ENV, null), TEST_TABLE);
+ ACCESS_CONTROLLER
+ .preEnableTable(ObserverContext.createAndPrepare(CP_ENV, null), TEST_TABLE);
return null;
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, enableTable);
- verifyDenied(USER_RW, enableTable);
- verifyDenied(USER_RO, enableTable);
- verifyDenied(USER_NONE, enableTable);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, enableTable);
+ verifyAllowed(enableTable, SUPERUSER, USER_ADMIN, USER_CREATE, USER_OWNER);
+ verifyDenied(enableTable, USER_RW, USER_RO, USER_NONE);
}
@Test
public void testMove() throws Exception {
+ Map regions;
HTable table = new HTable(TEST_UTIL.getConfiguration(), TEST_TABLE);
- Map regions = table.getRegionsInfo();
- final Map.Entry firstRegion =
- regions.entrySet().iterator().next();
+ try {
+ regions = table.getRegionsInfo();
+ } finally {
+ table.close();
+ }
+ final Map.Entry firstRegion = regions.entrySet().iterator().next();
final ServerName server = TEST_UTIL.getHBaseCluster().getRegionServer(0).getServerName();
PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
public Object run() throws Exception {
ACCESS_CONTROLLER.preMove(ObserverContext.createAndPrepare(CP_ENV, null),
- firstRegion.getKey(), server, server);
+ firstRegion.getKey(), server, server);
return null;
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, action);
- verifyDenied(USER_RW, action);
- verifyDenied(USER_RO, action);
- verifyDenied(USER_NONE, action);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, action);
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_OWNER);
+ verifyDenied(action, USER_CREATE, USER_RW, USER_RO, USER_NONE);
}
@Test
public void testAssign() throws Exception {
+ Map regions;
HTable table = new HTable(TEST_UTIL.getConfiguration(), TEST_TABLE);
- Map regions = table.getRegionsInfo();
- final Map.Entry firstRegion =
- regions.entrySet().iterator().next();
+ try {
+ regions = table.getRegionsInfo();
+ } finally {
+ table.close();
+ }
+ final Map.Entry firstRegion = regions.entrySet().iterator().next();
PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
public Object run() throws Exception {
ACCESS_CONTROLLER.preAssign(ObserverContext.createAndPrepare(CP_ENV, null),
- firstRegion.getKey());
+ firstRegion.getKey());
return null;
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, action);
- verifyDenied(USER_RW, action);
- verifyDenied(USER_RO, action);
- verifyDenied(USER_NONE, action);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, action);
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_OWNER);
+ verifyDenied(action, USER_CREATE, USER_RW, USER_RO, USER_NONE);
}
@Test
public void testUnassign() throws Exception {
+ Map regions;
HTable table = new HTable(TEST_UTIL.getConfiguration(), TEST_TABLE);
- Map regions = table.getRegionsInfo();
- final Map.Entry firstRegion =
- regions.entrySet().iterator().next();
+ try {
+ regions = table.getRegionsInfo();
+ } finally {
+ table.close();
+ }
+ final Map.Entry firstRegion = regions.entrySet().iterator().next();
PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
public Object run() throws Exception {
ACCESS_CONTROLLER.preUnassign(ObserverContext.createAndPrepare(CP_ENV, null),
- firstRegion.getKey(), false);
+ firstRegion.getKey(), false);
return null;
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, action);
- verifyDenied(USER_RW, action);
- verifyDenied(USER_RO, action);
- verifyDenied(USER_NONE, action);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, action);
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_OWNER);
+ verifyDenied(action, USER_CREATE, USER_RW, USER_RO, USER_NONE);
}
@Test
@@ -426,14 +477,8 @@ public Object run() throws Exception {
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, action);
- verifyDenied(USER_RW, action);
- verifyDenied(USER_RO, action);
- verifyDenied(USER_NONE, action);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, action);
+ verifyAllowed(action, SUPERUSER, USER_ADMIN);
+ verifyDenied(action, USER_CREATE, USER_OWNER, USER_RW, USER_RO, USER_NONE);
}
@Test
@@ -445,14 +490,8 @@ public Object run() throws Exception {
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, action);
- verifyDenied(USER_RW, action);
- verifyDenied(USER_RO, action);
- verifyDenied(USER_NONE, action);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, action);
+ verifyAllowed(action, SUPERUSER, USER_ADMIN);
+ verifyDenied(action, USER_CREATE, USER_OWNER, USER_RW, USER_RO, USER_NONE);
}
@Test
@@ -464,14 +503,8 @@ public Object run() throws Exception {
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, action);
- verifyDenied(USER_RW, action);
- verifyDenied(USER_RO, action);
- verifyDenied(USER_NONE, action);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, action);
+ verifyAllowed(action, SUPERUSER, USER_ADMIN);
+ verifyDenied(action, USER_CREATE, USER_OWNER, USER_RW, USER_RO, USER_NONE);
}
@Test
@@ -483,36 +516,75 @@ public Object run() throws Exception {
}
};
- // all others should be denied
- verifyDenied(USER_OWNER, action);
- verifyDenied(USER_RW, action);
- verifyDenied(USER_RO, action);
- verifyDenied(USER_NONE, action);
-
- // verify that superuser can create tables
- verifyAllowed(SUPERUSER, action);
+ verifyAllowed(action, SUPERUSER, USER_ADMIN);
+ verifyDenied(action, USER_CREATE, USER_OWNER, USER_RW, USER_RO, USER_NONE);
}
private void verifyWrite(PrivilegedExceptionAction action) throws Exception {
- // should be denied
- verifyDenied(USER_NONE, action);
- verifyDenied(USER_RO, action);
-
- // should be allowed
- verifyAllowed(SUPERUSER, action);
- verifyAllowed(USER_OWNER, action);
- verifyAllowed(USER_RW, action);
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_OWNER, USER_RW);
+ verifyDenied(action, USER_NONE, USER_CREATE, USER_RO);
+ }
+
+ @Test
+ public void testSplit() throws Exception {
+ PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preSplit(ObserverContext.createAndPrepare(RCP_ENV, null));
+ return null;
+ }
+ };
+
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_OWNER);
+ verifyDenied(action, USER_CREATE, USER_RW, USER_RO, USER_NONE);
+ }
+
+ @Test
+ public void testFlush() throws Exception {
+ PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preFlush(ObserverContext.createAndPrepare(RCP_ENV, null));
+ return null;
+ }
+ };
+
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_OWNER, USER_CREATE);
+ verifyDenied(action, USER_RW, USER_RO, USER_NONE);
+ }
+
+ @Test
+ public void testCompact() throws Exception {
+ PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preCompact(ObserverContext.createAndPrepare(RCP_ENV, null), null, null);
+ return null;
+ }
+ };
+
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_OWNER, USER_CREATE);
+ verifyDenied(action, USER_RW, USER_RO, USER_NONE);
+ }
+
+ @Test
+ public void testPreCompactSelection() throws Exception {
+ PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preCompactSelection(ObserverContext.createAndPrepare(RCP_ENV, null), null, null);
+ return null;
+ }
+ };
+
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_OWNER);
+ verifyDenied(action, USER_CREATE, USER_RW, USER_RO, USER_NONE);
}
private void verifyRead(PrivilegedExceptionAction action) throws Exception {
- // should be denied
- verifyDenied(USER_NONE, action);
-
- // should be allowed
- verifyAllowed(SUPERUSER, action);
- verifyAllowed(USER_OWNER, action);
- verifyAllowed(USER_RW, action);
- verifyAllowed(USER_RO, action);
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_OWNER, USER_RW, USER_RO);
+ verifyDenied(action, USER_NONE, USER_CREATE);
+ }
+
+ private void verifyReadWrite(PrivilegedExceptionAction action) throws Exception {
+ verifyAllowed(action, SUPERUSER, USER_ADMIN, USER_OWNER, USER_RW);
+ verifyDenied(action, USER_NONE, USER_CREATE, USER_RO);
}
@Test
@@ -523,7 +595,11 @@ public Object run() throws Exception {
Get g = new Get(Bytes.toBytes("random_row"));
g.addFamily(TEST_FAMILY);
HTable t = new HTable(conf, TEST_TABLE);
- t.get(g);
+ try {
+ t.get(g);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -536,14 +612,18 @@ public Object run() throws Exception {
s.addFamily(TEST_FAMILY);
HTable table = new HTable(conf, TEST_TABLE);
- ResultScanner scanner = table.getScanner(s);
try {
- for (Result r = scanner.next(); r != null; r = scanner.next()) {
- // do nothing
+ ResultScanner scanner = table.getScanner(s);
+ try {
+ for (Result r = scanner.next(); r != null; r = scanner.next()) {
+ // do nothing
+ }
+ } catch (IOException e) {
+ } finally {
+ scanner.close();
}
- } catch (IOException e) {
} finally {
- scanner.close();
+ table.close();
}
return null;
}
@@ -560,7 +640,11 @@ public Object run() throws Exception {
Put p = new Put(Bytes.toBytes("random_row"));
p.add(TEST_FAMILY, Bytes.toBytes("Qualifier"), Bytes.toBytes(1));
HTable t = new HTable(conf, TEST_TABLE);
- t.put(p);
+ try {
+ t.put(p);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -572,7 +656,11 @@ public Object run() throws Exception {
Delete d = new Delete(Bytes.toBytes("random_row"));
d.deleteFamily(TEST_FAMILY);
HTable t = new HTable(conf, TEST_TABLE);
- t.delete(d);
+ try {
+ t.delete(d);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -584,15 +672,247 @@ public Object run() throws Exception {
Increment inc = new Increment(Bytes.toBytes("random_row"));
inc.addColumn(TEST_FAMILY, Bytes.toBytes("Qualifier"), 1);
HTable t = new HTable(conf, TEST_TABLE);
- t.increment(inc);
+ try {
+ t.increment(inc);
+ } finally {
+ t.close();
+ }
return null;
}
};
verifyWrite(incrementAction);
}
+ @Test
+ public void testReadWrite() throws Exception {
+ // action for checkAndDelete
+ PrivilegedExceptionAction checkAndDeleteAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ Delete d = new Delete(Bytes.toBytes("random_row"));
+ d.deleteFamily(TEST_FAMILY);
+ HTable t = new HTable(conf, TEST_TABLE);
+ try {
+ t.checkAndDelete(Bytes.toBytes("random_row"), TEST_FAMILY, Bytes.toBytes("q"),
+ Bytes.toBytes("test_value"), d);
+ } finally {
+ t.close();
+ }
+ return null;
+ }
+ };
+ verifyReadWrite(checkAndDeleteAction);
+
+ // action for checkAndPut()
+ PrivilegedExceptionAction checkAndPut = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ Put p = new Put(Bytes.toBytes("random_row"));
+ p.add(TEST_FAMILY, Bytes.toBytes("Qualifier"), Bytes.toBytes(1));
+ HTable t = new HTable(conf, TEST_TABLE);
+ try {
+ t.checkAndPut(Bytes.toBytes("random_row"), TEST_FAMILY, Bytes.toBytes("q"),
+ Bytes.toBytes("test_value"), p);
+ } finally {
+ t.close();
+ }
+ return null;
+ }
+ };
+ verifyReadWrite(checkAndPut);
+ }
+
+ @Test
+ public void testBulkLoad() throws Exception {
+ FileSystem fs = TEST_UTIL.getTestFileSystem();
+ final Path dir = TEST_UTIL.getDataTestDir("testBulkLoad");
+ fs.mkdirs(dir);
+ //need to make it globally writable
+ //so users creating HFiles have write permissions
+ fs.setPermission(dir, FsPermission.valueOf("-rwxrwxrwx"));
+
+ PrivilegedExceptionAction bulkLoadAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ int numRows = 3;
+
+ //Making the assumption that the test table won't split between the range
+ byte[][][] hfileRanges = {{{(byte)0}, {(byte)9}}};
+
+ Path bulkLoadBasePath = new Path(dir, new Path(User.getCurrent().getName()));
+ new BulkLoadHelper(bulkLoadBasePath)
+ .bulkLoadHFile(TEST_TABLE, TEST_FAMILY, Bytes.toBytes("q"), hfileRanges, numRows);
+
+ return null;
+ }
+ };
+ verifyAllowed(bulkLoadAction, SUPERUSER, USER_ADMIN, USER_OWNER, USER_CREATE);
+ verifyDenied(bulkLoadAction, USER_RW, USER_RO, USER_NONE);
+
+ // Reinit after the bulk upload
+ TEST_UTIL.getHBaseAdmin().disableTable(TEST_TABLE);
+ TEST_UTIL.getHBaseAdmin().enableTable(TEST_TABLE);
+ }
+
+ public class BulkLoadHelper {
+ private final FileSystem fs;
+ private final Path loadPath;
+ private final Configuration conf;
+
+ public BulkLoadHelper(Path loadPath) throws IOException {
+ fs = TEST_UTIL.getTestFileSystem();
+ conf = TEST_UTIL.getConfiguration();
+ loadPath = loadPath.makeQualified(fs);
+ this.loadPath = loadPath;
+ }
+
+ private void createHFile(Path path,
+ byte[] family, byte[] qualifier,
+ byte[] startKey, byte[] endKey, int numRows) throws IOException {
+
+ HFile.Writer writer = null;
+ long now = System.currentTimeMillis();
+ try {
+ writer = HFile.getWriterFactory(conf, new CacheConfig(conf))
+ .withPath(fs, path)
+ .withComparator(KeyValue.KEY_COMPARATOR)
+ .create();
+ // subtract 2 since numRows doesn't include boundary keys
+ for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, true, numRows-2)) {
+ KeyValue kv = new KeyValue(key, family, qualifier, now, key);
+ writer.append(kv);
+ }
+ } finally {
+ if(writer != null)
+ writer.close();
+ }
+ }
+
+ private void bulkLoadHFile(
+ byte[] tableName,
+ byte[] family,
+ byte[] qualifier,
+ byte[][][] hfileRanges,
+ int numRowsPerRange) throws Exception {
+
+ Path familyDir = new Path(loadPath, Bytes.toString(family));
+ fs.mkdirs(familyDir);
+ int hfileIdx = 0;
+ for (byte[][] range : hfileRanges) {
+ byte[] from = range[0];
+ byte[] to = range[1];
+ createHFile(new Path(familyDir, "hfile_"+(hfileIdx++)),
+ family, qualifier, from, to, numRowsPerRange);
+ }
+ //set global read so RegionServer can move it
+ setPermission(loadPath, FsPermission.valueOf("-rwxrwxrwx"));
+
+ HTable table = new HTable(conf, tableName);
+ try {
+ TEST_UTIL.waitTableAvailable(tableName, 30000);
+ conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
+ LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
+ loader.doBulkLoad(loadPath, table);
+ } finally {
+ table.close();
+ }
+ }
+
+ public void setPermission(Path dir, FsPermission perm) throws IOException {
+ if(!fs.getFileStatus(dir).isDir()) {
+ fs.setPermission(dir,perm);
+ }
+ else {
+ for(FileStatus el : fs.listStatus(dir)) {
+ fs.setPermission(el.getPath(), perm);
+ setPermission(el.getPath() , perm);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testAppend() throws Exception {
+
+ PrivilegedExceptionAction appendAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ byte[] row = Bytes.toBytes("random_row");
+ byte[] qualifier = Bytes.toBytes("q");
+ Put put = new Put(row);
+ put.add(TEST_FAMILY, qualifier, Bytes.toBytes(1));
+ Append append = new Append(row);
+ append.add(TEST_FAMILY, qualifier, Bytes.toBytes(2));
+ HTable t = new HTable(conf, TEST_TABLE);
+ try {
+ t.put(put);
+ t.append(append);
+ } finally {
+ t.close();
+ }
+ return null;
+ }
+ };
+
+ verifyAllowed(appendAction, SUPERUSER, USER_ADMIN, USER_OWNER, USER_RW);
+ verifyDenied(appendAction, USER_CREATE, USER_RO, USER_NONE);
+ }
+
@Test
public void testGrantRevoke() throws Exception {
+
+ PrivilegedExceptionAction grantAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ TEST_TABLE);
+ protocol.grant(new UserPermission(Bytes.toBytes(USER_RO.getShortName()), TEST_TABLE,
+ TEST_FAMILY, (byte[]) null, Action.READ));
+ } finally {
+ acl.close();
+ }
+ return null;
+ }
+ };
+
+ PrivilegedExceptionAction revokeAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ TEST_TABLE);
+ protocol.revoke(new UserPermission(Bytes.toBytes(USER_RO.getShortName()), TEST_TABLE,
+ TEST_FAMILY, (byte[]) null, Action.READ));
+ } finally {
+ acl.close();
+ }
+ return null;
+ }
+ };
+
+ PrivilegedExceptionAction getPermissionsAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ TEST_TABLE);
+ protocol.getUserPermissions(TEST_TABLE);
+ } finally {
+ acl.close();
+ }
+ return null;
+ }
+ };
+
+ verifyAllowed(grantAction, SUPERUSER, USER_ADMIN, USER_OWNER);
+ verifyDenied(grantAction, USER_CREATE, USER_RW, USER_RO, USER_NONE);
+
+ verifyAllowed(revokeAction, SUPERUSER, USER_ADMIN, USER_OWNER);
+ verifyDenied(revokeAction, USER_CREATE, USER_RW, USER_RO, USER_NONE);
+
+ verifyAllowed(getPermissionsAction, SUPERUSER, USER_ADMIN, USER_OWNER);
+ verifyDenied(getPermissionsAction, USER_CREATE, USER_RW, USER_RO, USER_NONE);
+ }
+
+ @Test
+ public void testPostGrantRevoke() throws Exception {
final byte[] tableName = Bytes.toBytes("TempTable");
final byte[] family1 = Bytes.toBytes("f1");
final byte[] family2 = Bytes.toBytes("f2");
@@ -607,18 +927,13 @@ public void testGrantRevoke() throws Exception {
HTableDescriptor htd = new HTableDescriptor(tableName);
htd.addFamily(new HColumnDescriptor(family1));
htd.addFamily(new HColumnDescriptor(family2));
- htd.setOwnerString(USER_OWNER.getShortName());
admin.createTable(htd);
// create temp users
- User user = User.createUserForTesting(TEST_UTIL.getConfiguration(),
- "user", new String[0]);
-
- // perms only stored against the first region
- HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
- AccessControllerProtocol protocol =
- acl.coprocessorProxy(AccessControllerProtocol.class,
- tableName);
+ User tblUser = User
+ .createUserForTesting(TEST_UTIL.getConfiguration(), "tbluser", new String[0]);
+ User gblUser = User
+ .createUserForTesting(TEST_UTIL.getConfiguration(), "gbluser", new String[0]);
// prepare actions:
PrivilegedExceptionAction putActionAll = new PrivilegedExceptionAction() {
@@ -627,7 +942,11 @@ public Object run() throws Exception {
p.add(family1, qualifier, Bytes.toBytes("v1"));
p.add(family2, qualifier, Bytes.toBytes("v2"));
HTable t = new HTable(conf, tableName);
- t.put(p);
+ try {
+ t.put(p);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -636,7 +955,11 @@ public Object run() throws Exception {
Put p = new Put(Bytes.toBytes("a"));
p.add(family1, qualifier, Bytes.toBytes("v1"));
HTable t = new HTable(conf, tableName);
- t.put(p);
+ try {
+ t.put(p);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -645,7 +968,11 @@ public Object run() throws Exception {
Put p = new Put(Bytes.toBytes("a"));
p.add(family2, qualifier, Bytes.toBytes("v2"));
HTable t = new HTable(conf, tableName);
- t.put(p);
+ try {
+ t.put(p);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -655,7 +982,11 @@ public Object run() throws Exception {
g.addFamily(family1);
g.addFamily(family2);
HTable t = new HTable(conf, tableName);
- t.get(g);
+ try {
+ t.get(g);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -664,7 +995,11 @@ public Object run() throws Exception {
Get g = new Get(Bytes.toBytes("random_row"));
g.addFamily(family1);
HTable t = new HTable(conf, tableName);
- t.get(g);
+ try {
+ t.get(g);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -673,7 +1008,11 @@ public Object run() throws Exception {
Get g = new Get(Bytes.toBytes("random_row"));
g.addFamily(family2);
HTable t = new HTable(conf, tableName);
- t.get(g);
+ try {
+ t.get(g);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -683,7 +1022,11 @@ public Object run() throws Exception {
d.deleteFamily(family1);
d.deleteFamily(family2);
HTable t = new HTable(conf, tableName);
- t.delete(d);
+ try {
+ t.delete(d);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -692,7 +1035,11 @@ public Object run() throws Exception {
Delete d = new Delete(Bytes.toBytes("random_row"));
d.deleteFamily(family1);
HTable t = new HTable(conf, tableName);
- t.delete(d);
+ try {
+ t.delete(d);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -701,140 +1048,180 @@ public Object run() throws Exception {
Delete d = new Delete(Bytes.toBytes("random_row"));
d.deleteFamily(family2);
HTable t = new HTable(conf, tableName);
- t.delete(d);
+ try {
+ t.delete(d);
+ } finally {
+ t.close();
+ }
return null;
}
};
// initial check:
- verifyDenied(user, getActionAll);
- verifyDenied(user, getAction1);
- verifyDenied(user, getAction2);
+ verifyDenied(tblUser, getActionAll, getAction1, getAction2);
+ verifyDenied(tblUser, putActionAll, putAction1, putAction2);
+ verifyDenied(tblUser, deleteActionAll, deleteAction1, deleteAction2);
- verifyDenied(user, putActionAll);
- verifyDenied(user, putAction1);
- verifyDenied(user, putAction2);
-
- verifyDenied(user, deleteActionAll);
- verifyDenied(user, deleteAction1);
- verifyDenied(user, deleteAction2);
+ verifyDenied(gblUser, getActionAll, getAction1, getAction2);
+ verifyDenied(gblUser, putActionAll, putAction1, putAction2);
+ verifyDenied(gblUser, deleteActionAll, deleteAction1, deleteAction2);
// grant table read permission
- protocol.grant(Bytes.toBytes(user.getShortName()),
- new TablePermission(tableName, null, Permission.Action.READ));
- Thread.sleep(100);
- // check
- verifyAllowed(user, getActionAll);
- verifyAllowed(user, getAction1);
- verifyAllowed(user, getAction2);
-
- verifyDenied(user, putActionAll);
- verifyDenied(user, putAction1);
- verifyDenied(user, putAction2);
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.grant(new UserPermission(Bytes.toBytes(tblUser.getShortName()), tableName, null,
+ Permission.Action.READ));
+ protocol.grant(new UserPermission(Bytes.toBytes(gblUser.getShortName()),
+ Permission.Action.READ));
+ } finally {
+ acl.close();
+ }
- verifyDenied(user, deleteActionAll);
- verifyDenied(user, deleteAction1);
- verifyDenied(user, deleteAction2);
+ Thread.sleep(100);
+
+ // check
+ verifyAllowed(tblUser, getActionAll, getAction1, getAction2);
+ verifyDenied(tblUser, putActionAll, putAction1, putAction2);
+ verifyDenied(tblUser, deleteActionAll, deleteAction1, deleteAction2);
+
+ verifyAllowed(gblUser, getActionAll, getAction1, getAction2);
+ verifyDenied(gblUser, putActionAll, putAction1, putAction2);
+ verifyDenied(gblUser, deleteActionAll, deleteAction1, deleteAction2);
// grant table write permission
- protocol.grant(Bytes.toBytes(user.getShortName()),
- new TablePermission(tableName, null, Permission.Action.WRITE));
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.grant(new UserPermission(Bytes.toBytes(tblUser.getShortName()), tableName, null,
+ Permission.Action.WRITE));
+ protocol.grant(new UserPermission(Bytes.toBytes(gblUser.getShortName()),
+ Permission.Action.WRITE));
+ } finally {
+ acl.close();
+ }
+
Thread.sleep(100);
- verifyDenied(user, getActionAll);
- verifyDenied(user, getAction1);
- verifyDenied(user, getAction2);
- verifyAllowed(user, putActionAll);
- verifyAllowed(user, putAction1);
- verifyAllowed(user, putAction2);
+ verifyDenied(tblUser, getActionAll, getAction1, getAction2);
+ verifyAllowed(tblUser, putActionAll, putAction1, putAction2);
+ verifyAllowed(tblUser, deleteActionAll, deleteAction1, deleteAction2);
- verifyAllowed(user, deleteActionAll);
- verifyAllowed(user, deleteAction1);
- verifyAllowed(user, deleteAction2);
+ verifyDenied(gblUser, getActionAll, getAction1, getAction2);
+ verifyAllowed(gblUser, putActionAll, putAction1, putAction2);
+ verifyAllowed(gblUser, deleteActionAll, deleteAction1, deleteAction2);
// revoke table permission
- protocol.grant(Bytes.toBytes(user.getShortName()),
- new TablePermission(tableName, null, Permission.Action.READ,
- Permission.Action.WRITE));
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.grant(new UserPermission(Bytes.toBytes(tblUser.getShortName()), tableName, null,
+ Permission.Action.READ, Permission.Action.WRITE));
+ protocol.revoke(new UserPermission(Bytes.toBytes(tblUser.getShortName()), tableName, null));
+ protocol.revoke(new UserPermission(Bytes.toBytes(gblUser.getShortName())));
+ } finally {
+ acl.close();
+ }
- protocol.revoke(Bytes.toBytes(user.getShortName()),
- new TablePermission(tableName, null));
Thread.sleep(100);
- verifyDenied(user, getActionAll);
- verifyDenied(user, getAction1);
- verifyDenied(user, getAction2);
- verifyDenied(user, putActionAll);
- verifyDenied(user, putAction1);
- verifyDenied(user, putAction2);
+ verifyDenied(tblUser, getActionAll, getAction1, getAction2);
+ verifyDenied(tblUser, putActionAll, putAction1, putAction2);
+ verifyDenied(tblUser, deleteActionAll, deleteAction1, deleteAction2);
- verifyDenied(user, deleteActionAll);
- verifyDenied(user, deleteAction1);
- verifyDenied(user, deleteAction2);
+ verifyDenied(gblUser, getActionAll, getAction1, getAction2);
+ verifyDenied(gblUser, putActionAll, putAction1, putAction2);
+ verifyDenied(gblUser, deleteActionAll, deleteAction1, deleteAction2);
// grant column family read permission
- protocol.grant(Bytes.toBytes(user.getShortName()),
- new TablePermission(tableName, family1, Permission.Action.READ));
- Thread.sleep(100);
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.grant(new UserPermission(Bytes.toBytes(tblUser.getShortName()), tableName, family1,
+ Permission.Action.READ));
+ protocol.grant(new UserPermission(Bytes.toBytes(gblUser.getShortName()),
+ Permission.Action.READ));
+ } finally {
+ acl.close();
+ }
- verifyAllowed(user, getActionAll);
- verifyAllowed(user, getAction1);
- verifyDenied(user, getAction2);
+ Thread.sleep(100);
- verifyDenied(user, putActionAll);
- verifyDenied(user, putAction1);
- verifyDenied(user, putAction2);
+ // Access should be denied for family2
+ verifyAllowed(tblUser, getActionAll, getAction1);
+ verifyDenied(tblUser, getAction2);
+ verifyDenied(tblUser, putActionAll, putAction1, putAction2);
+ verifyDenied(tblUser, deleteActionAll, deleteAction1, deleteAction2);
- verifyDenied(user, deleteActionAll);
- verifyDenied(user, deleteAction1);
- verifyDenied(user, deleteAction2);
+ verifyAllowed(gblUser, getActionAll, getAction1, getAction2);
+ verifyDenied(gblUser, putActionAll, putAction1, putAction2);
+ verifyDenied(gblUser, deleteActionAll, deleteAction1, deleteAction2);
// grant column family write permission
- protocol.grant(Bytes.toBytes(user.getShortName()),
- new TablePermission(tableName, family2, Permission.Action.WRITE));
- Thread.sleep(100);
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.grant(new UserPermission(Bytes.toBytes(tblUser.getShortName()), tableName, family2,
+ Permission.Action.WRITE));
+ protocol.grant(new UserPermission(Bytes.toBytes(gblUser.getShortName()),
+ Permission.Action.WRITE));
+ } finally {
+ acl.close();
+ }
- verifyAllowed(user, getActionAll);
- verifyAllowed(user, getAction1);
- verifyDenied(user, getAction2);
+ Thread.sleep(100);
- verifyDenied(user, putActionAll);
- verifyDenied(user, putAction1);
- verifyAllowed(user, putAction2);
+ // READ from family1, WRITE to family2 are allowed
+ verifyAllowed(tblUser, getActionAll, getAction1);
+ verifyAllowed(tblUser, putAction2, deleteAction2);
+ verifyDenied(tblUser, getAction2);
+ verifyDenied(tblUser, putActionAll, putAction1);
+ verifyDenied(tblUser, deleteActionAll, deleteAction1);
- verifyDenied(user, deleteActionAll);
- verifyDenied(user, deleteAction1);
- verifyAllowed(user, deleteAction2);
+ verifyDenied(gblUser, getActionAll, getAction1, getAction2);
+ verifyAllowed(gblUser, putActionAll, putAction1, putAction2);
+ verifyAllowed(gblUser, deleteActionAll, deleteAction1, deleteAction2);
// revoke column family permission
- protocol.revoke(Bytes.toBytes(user.getShortName()),
- new TablePermission(tableName, family2));
- Thread.sleep(100);
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.revoke(new UserPermission(Bytes.toBytes(tblUser.getShortName()), tableName, family2));
+ protocol.revoke(new UserPermission(Bytes.toBytes(gblUser.getShortName())));
+ } finally {
+ acl.close();
+ }
- verifyAllowed(user, getActionAll);
- verifyAllowed(user, getAction1);
- verifyDenied(user, getAction2);
+ Thread.sleep(100);
- verifyDenied(user, putActionAll);
- verifyDenied(user, putAction1);
- verifyDenied(user, putAction2);
+ // Revoke on family2 should not have impact on family1 permissions
+ verifyAllowed(tblUser, getActionAll, getAction1);
+ verifyDenied(tblUser, getAction2);
+ verifyDenied(tblUser, putActionAll, putAction1, putAction2);
+ verifyDenied(tblUser, deleteActionAll, deleteAction1, deleteAction2);
- verifyDenied(user, deleteActionAll);
- verifyDenied(user, deleteAction1);
- verifyDenied(user, deleteAction2);
+ // Should not have access as global permissions are completely revoked
+ verifyDenied(gblUser, getActionAll, getAction1, getAction2);
+ verifyDenied(gblUser, putActionAll, putAction1, putAction2);
+ verifyDenied(gblUser, deleteActionAll, deleteAction1, deleteAction2);
// delete table
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
- private boolean hasFoundUserPermission(UserPermission userPermission,
- List perms) {
+ private boolean hasFoundUserPermission(UserPermission userPermission, List perms) {
return perms.contains(userPermission);
}
@Test
- public void testGrantRevokeAtQualifierLevel() throws Exception {
+ public void testPostGrantRevokeAtQualifierLevel() throws Exception {
final byte[] tableName = Bytes.toBytes("testGrantRevokeAtQualifierLevel");
final byte[] family1 = Bytes.toBytes("f1");
final byte[] family2 = Bytes.toBytes("f2");
@@ -842,7 +1229,6 @@ public void testGrantRevokeAtQualifierLevel() throws Exception {
// create table
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
-
if (admin.tableExists(tableName)) {
admin.disableTable(tableName);
admin.deleteTable(tableName);
@@ -850,23 +1236,21 @@ public void testGrantRevokeAtQualifierLevel() throws Exception {
HTableDescriptor htd = new HTableDescriptor(tableName);
htd.addFamily(new HColumnDescriptor(family1));
htd.addFamily(new HColumnDescriptor(family2));
- htd.setOwnerString(USER_OWNER.getShortName());
admin.createTable(htd);
// create temp users
- User user = User.createUserForTesting(TEST_UTIL.getConfiguration(),
- "user", new String[0]);
-
- HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
- AccessControllerProtocol protocol =
- acl.coprocessorProxy(AccessControllerProtocol.class, tableName);
+ User user = User.createUserForTesting(TEST_UTIL.getConfiguration(), "user", new String[0]);
PrivilegedExceptionAction getQualifierAction = new PrivilegedExceptionAction() {
public Object run() throws Exception {
Get g = new Get(Bytes.toBytes("random_row"));
g.addColumn(family1, qualifier);
HTable t = new HTable(conf, tableName);
- t.get(g);
+ try {
+ t.get(g);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -875,7 +1259,11 @@ public Object run() throws Exception {
Put p = new Put(Bytes.toBytes("random_row"));
p.add(family1, qualifier, Bytes.toBytes("v1"));
HTable t = new HTable(conf, tableName);
- t.put(p);
+ try {
+ t.put(p);
+ } finally {
+ t.close();
+ }
return null;
}
};
@@ -883,22 +1271,40 @@ public Object run() throws Exception {
public Object run() throws Exception {
Delete d = new Delete(Bytes.toBytes("random_row"));
d.deleteColumn(family1, qualifier);
- //d.deleteFamily(family1);
+ // d.deleteFamily(family1);
HTable t = new HTable(conf, tableName);
- t.delete(d);
+ try {
+ t.delete(d);
+ } finally {
+ t.close();
+ }
return null;
}
};
- protocol.revoke(Bytes.toBytes(user.getShortName()),
- new TablePermission(tableName, family1));
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.revoke(new UserPermission(Bytes.toBytes(user.getShortName()), tableName, family1));
+ } finally {
+ acl.close();
+ }
+
verifyDenied(user, getQualifierAction);
verifyDenied(user, putQualifierAction);
verifyDenied(user, deleteQualifierAction);
- protocol.grant(Bytes.toBytes(user.getShortName()),
- new TablePermission(tableName, family1, qualifier,
- Permission.Action.READ));
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.grant(new UserPermission(Bytes.toBytes(user.getShortName()), tableName, family1,
+ qualifier, Permission.Action.READ));
+ } finally {
+ acl.close();
+ }
+
Thread.sleep(100);
verifyAllowed(user, getQualifierAction);
@@ -907,9 +1313,16 @@ public Object run() throws Exception {
// only grant write permission
// TODO: comment this portion after HBASE-3583
- protocol.grant(Bytes.toBytes(user.getShortName()),
- new TablePermission(tableName, family1, qualifier,
- Permission.Action.WRITE));
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.grant(new UserPermission(Bytes.toBytes(user.getShortName()), tableName, family1,
+ qualifier, Permission.Action.WRITE));
+ } finally {
+ acl.close();
+ }
+
Thread.sleep(100);
verifyDenied(user, getQualifierAction);
@@ -917,9 +1330,16 @@ public Object run() throws Exception {
verifyAllowed(user, deleteQualifierAction);
// grant both read and write permission.
- protocol.grant(Bytes.toBytes(user.getShortName()),
- new TablePermission(tableName, family1, qualifier,
- Permission.Action.READ, Permission.Action.WRITE));
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.grant(new UserPermission(Bytes.toBytes(user.getShortName()), tableName, family1,
+ qualifier, Permission.Action.READ, Permission.Action.WRITE));
+ } finally {
+ acl.close();
+ }
+
Thread.sleep(100);
verifyAllowed(user, getQualifierAction);
@@ -927,8 +1347,16 @@ public Object run() throws Exception {
verifyAllowed(user, deleteQualifierAction);
// revoke family level permission won't impact column level.
- protocol.revoke(Bytes.toBytes(user.getShortName()),
- new TablePermission(tableName, family1, qualifier));
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.revoke(new UserPermission(Bytes.toBytes(user.getShortName()), tableName, family1,
+ qualifier));
+ } finally {
+ acl.close();
+ }
+
Thread.sleep(100);
verifyDenied(user, getQualifierAction);
@@ -957,113 +1385,160 @@ public void testPermissionList() throws Exception {
HTableDescriptor htd = new HTableDescriptor(tableName);
htd.addFamily(new HColumnDescriptor(family1));
htd.addFamily(new HColumnDescriptor(family2));
- htd.setOwnerString(USER_OWNER.getShortName());
+ htd.setOwner(USER_OWNER);
admin.createTable(htd);
+ List perms;
HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
- AccessControllerProtocol protocol =
- acl.coprocessorProxy(AccessControllerProtocol.class, tableName);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ perms = protocol.getUserPermissions(tableName);
+ } finally {
+ acl.close();
+ }
- List perms = protocol.getUserPermissions(tableName);
+ UserPermission ownerperm = new UserPermission(Bytes.toBytes(USER_OWNER.getName()), tableName,
+ null, Action.values());
+ assertTrue("Owner should have all permissions on table",
+ hasFoundUserPermission(ownerperm, perms));
- UserPermission up = new UserPermission(user,
- tableName, family1, qualifier, Permission.Action.READ);
+ UserPermission up = new UserPermission(user, tableName, family1, qualifier,
+ Permission.Action.READ);
assertFalse("User should not be granted permission: " + up.toString(),
- hasFoundUserPermission(up, perms));
+ hasFoundUserPermission(up, perms));
// grant read permission
- UserPermission upToSet = new UserPermission(user,
- tableName, family1, qualifier, Permission.Action.READ);
- protocol.grant(user, upToSet);
- perms = protocol.getUserPermissions(tableName);
+ UserPermission upToSet = new UserPermission(user, tableName, family1, qualifier,
+ Permission.Action.READ);
+
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.grant(upToSet);
+ perms = protocol.getUserPermissions(tableName);
+ } finally {
+ acl.close();
+ }
- UserPermission upToVerify = new UserPermission(user,
- tableName, family1, qualifier, Permission.Action.READ);
+ UserPermission upToVerify = new UserPermission(user, tableName, family1, qualifier,
+ Permission.Action.READ);
assertTrue("User should be granted permission: " + upToVerify.toString(),
- hasFoundUserPermission(upToVerify, perms));
+ hasFoundUserPermission(upToVerify, perms));
- upToVerify = new UserPermission(user, tableName, family1, qualifier,
- Permission.Action.WRITE);
+ upToVerify = new UserPermission(user, tableName, family1, qualifier, Permission.Action.WRITE);
assertFalse("User should not be granted permission: " + upToVerify.toString(),
- hasFoundUserPermission(upToVerify, perms));
+ hasFoundUserPermission(upToVerify, perms));
// grant read+write
- upToSet = new UserPermission(user, tableName, family1, qualifier,
- Permission.Action.WRITE, Permission.Action.READ);
- protocol.grant(user, upToSet);
- perms = protocol.getUserPermissions(tableName);
+ upToSet = new UserPermission(user, tableName, family1, qualifier, Permission.Action.WRITE,
+ Permission.Action.READ);
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.grant(upToSet);
+ perms = protocol.getUserPermissions(tableName);
+ } finally {
+ acl.close();
+ }
- upToVerify = new UserPermission(user, tableName, family1, qualifier,
- Permission.Action.WRITE, Permission.Action.READ);
+ upToVerify = new UserPermission(user, tableName, family1, qualifier, Permission.Action.WRITE,
+ Permission.Action.READ);
assertTrue("User should be granted permission: " + upToVerify.toString(),
- hasFoundUserPermission(upToVerify, perms));
+ hasFoundUserPermission(upToVerify, perms));
+
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ protocol.revoke(upToSet);
+ perms = protocol.getUserPermissions(tableName);
+ } finally {
+ acl.close();
+ }
- protocol.revoke(user, upToSet);
- perms = protocol.getUserPermissions(tableName);
assertFalse("User should not be granted permission: " + upToVerify.toString(),
hasFoundUserPermission(upToVerify, perms));
- // delete table
+ // disable table before modification
admin.disableTable(tableName);
+
+ User newOwner = User.createUserForTesting(conf, "new_owner", new String[] {});
+ htd.setOwner(newOwner);
+ admin.modifyTable(tableName, htd);
+
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ tableName);
+ perms = protocol.getUserPermissions(tableName);
+ } finally {
+ acl.close();
+ }
+
+ UserPermission newOwnerperm = new UserPermission(Bytes.toBytes(newOwner.getName()), tableName,
+ null, Action.values());
+ assertTrue("New owner should have all permissions on table",
+ hasFoundUserPermission(newOwnerperm, perms));
+
+ // delete table
admin.deleteTable(tableName);
}
- /** global operations*/
+ /** global operations */
private void verifyGlobal(PrivilegedExceptionAction> action) throws Exception {
- // should be allowed
- verifyAllowed(SUPERUSER, action);
-
- // should be denied
- verifyDenied(USER_OWNER, action);
- verifyDenied(USER_RW, action);
- verifyDenied(USER_NONE, action);
- verifyDenied(USER_RO, action);
+ verifyAllowed(action, SUPERUSER);
+
+ verifyDenied(action, USER_CREATE, USER_RW, USER_NONE, USER_RO);
}
public void checkGlobalPerms(Permission.Action... actions) throws IOException {
- HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
- AccessControllerProtocol protocol =
- acl.coprocessorProxy(AccessControllerProtocol.class, new byte[0]);
-
Permission[] perms = new Permission[actions.length];
- for (int i=0; i < actions.length; i++) {
+ for (int i = 0; i < actions.length; i++) {
perms[i] = new Permission(actions[i]);
}
-
- protocol.checkPermissions(perms);
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ new byte[0]);
+ protocol.checkPermissions(perms);
+ } finally {
+ acl.close();
+ }
}
public void checkTablePerms(byte[] table, byte[] family, byte[] column,
Permission.Action... actions) throws IOException {
Permission[] perms = new Permission[actions.length];
- for (int i=0; i < actions.length; i++) {
+ for (int i = 0; i < actions.length; i++) {
perms[i] = new TablePermission(table, family, column, actions[i]);
}
checkTablePerms(table, perms);
}
- public void checkTablePerms(byte[] table, Permission...perms) throws IOException {
+ public void checkTablePerms(byte[] table, Permission... perms) throws IOException {
HTable acl = new HTable(conf, table);
- AccessControllerProtocol protocol =
- acl.coprocessorProxy(AccessControllerProtocol.class, new byte[0]);
-
- protocol.checkPermissions(perms);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ new byte[0]);
+ protocol.checkPermissions(perms);
+ } finally {
+ acl.close();
+ }
}
- public void grant(AccessControllerProtocol protocol, User user, byte[] t, byte[] f,
- byte[] q, Permission.Action... actions) throws IOException {
- protocol.grant(Bytes.toBytes(user.getShortName()), new TablePermission(t, f, q, actions));
+ public void grant(AccessControllerProtocol protocol, User user, byte[] t, byte[] f, byte[] q,
+ Permission.Action... actions) throws IOException {
+ protocol.grant(new UserPermission(Bytes.toBytes(user.getShortName()), t, f, q, actions));
}
@Test
public void testCheckPermissions() throws Exception {
- final HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
- final AccessControllerProtocol protocol =
- acl.coprocessorProxy(AccessControllerProtocol.class, TEST_TABLE);
-
- //--------------------------------------
- //test global permissions
+ // --------------------------------------
+ // test global permissions
PrivilegedExceptionAction globalAdmin = new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
@@ -1071,11 +1546,11 @@ public Void run() throws Exception {
return null;
}
};
- //verify that only superuser can admin
+ // verify that only superuser can admin
verifyGlobal(globalAdmin);
- //--------------------------------------
- //test multiple permissions
+ // --------------------------------------
+ // test multiple permissions
PrivilegedExceptionAction globalReadWrite = new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
@@ -1086,8 +1561,8 @@ public Void run() throws Exception {
verifyGlobal(globalReadWrite);
- //--------------------------------------
- //table/column/qualifier level permissions
+ // --------------------------------------
+ // table/column/qualifier level permissions
final byte[] TEST_Q1 = Bytes.toBytes("q1");
final byte[] TEST_Q2 = Bytes.toBytes("q2");
@@ -1095,9 +1570,16 @@ public Void run() throws Exception {
User userColumn = User.createUserForTesting(conf, "user_check_perms_family", new String[0]);
User userQualifier = User.createUserForTesting(conf, "user_check_perms_q", new String[0]);
- grant(protocol, userTable, TEST_TABLE, null, null, Permission.Action.READ);
- grant(protocol, userColumn, TEST_TABLE, TEST_FAMILY, null, Permission.Action.READ);
- grant(protocol, userQualifier, TEST_TABLE, TEST_FAMILY, TEST_Q1, Permission.Action.READ);
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ TEST_TABLE);
+ grant(protocol, userTable, TEST_TABLE, null, null, Permission.Action.READ);
+ grant(protocol, userColumn, TEST_TABLE, TEST_FAMILY, null, Permission.Action.READ);
+ grant(protocol, userQualifier, TEST_TABLE, TEST_FAMILY, TEST_Q1, Permission.Action.READ);
+ } finally {
+ acl.close();
+ }
PrivilegedExceptionAction tableRead = new PrivilegedExceptionAction() {
@Override
@@ -1127,9 +1609,8 @@ public Void run() throws Exception {
@Override
public Void run() throws Exception {
checkTablePerms(TEST_TABLE, new Permission[] {
- new TablePermission(TEST_TABLE, TEST_FAMILY, TEST_Q1, Permission.Action.READ),
- new TablePermission(TEST_TABLE, TEST_FAMILY, TEST_Q2, Permission.Action.READ),
- });
+ new TablePermission(TEST_TABLE, TEST_FAMILY, TEST_Q1, Permission.Action.READ),
+ new TablePermission(TEST_TABLE, TEST_FAMILY, TEST_Q2, Permission.Action.READ), });
return null;
}
};
@@ -1137,10 +1618,8 @@ public Void run() throws Exception {
PrivilegedExceptionAction globalAndTableRead = new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
- checkTablePerms(TEST_TABLE, new Permission[] {
- new Permission(Permission.Action.READ),
- new TablePermission(TEST_TABLE, null, (byte[])null, Permission.Action.READ),
- });
+ checkTablePerms(TEST_TABLE, new Permission[] { new Permission(Permission.Action.READ),
+ new TablePermission(TEST_TABLE, null, (byte[]) null, Permission.Action.READ), });
return null;
}
};
@@ -1169,31 +1648,465 @@ public Void run() throws Exception {
verifyAllowed(noCheck, SUPERUSER, userTable, userColumn, userQualifier);
- //--------------------------------------
- //test family level multiple permissions
+ // --------------------------------------
+ // test family level multiple permissions
PrivilegedExceptionAction familyReadWrite = new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
checkTablePerms(TEST_TABLE, TEST_FAMILY, null, Permission.Action.READ,
- Permission.Action.WRITE);
+ Permission.Action.WRITE);
return null;
}
};
- // should be allowed
+
verifyAllowed(familyReadWrite, SUPERUSER, USER_OWNER, USER_RW);
- // should be denied
- verifyDenied(familyReadWrite, USER_NONE, USER_RO);
+ verifyDenied(familyReadWrite, USER_NONE, USER_CREATE, USER_RO);
- //--------------------------------------
- //check for wrong table region
+ // --------------------------------------
+ // check for wrong table region
+ acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
try {
- //but ask for TablePermissions for TEST_TABLE
- protocol.checkPermissions(new Permission[] {(Permission) new TablePermission(
- TEST_TABLE, null, (byte[])null, Permission.Action.CREATE)});
- fail("this should have thrown CoprocessorException");
- } catch(CoprocessorException ex) {
- //expected
+ AccessControllerProtocol protocol = acl.coprocessorProxy(AccessControllerProtocol.class,
+ TEST_TABLE);
+ try {
+ // but ask for TablePermissions for TEST_TABLE
+ protocol.checkPermissions(new Permission[] { (Permission) new TablePermission(TEST_TABLE,
+ null, (byte[]) null, Permission.Action.CREATE) });
+ fail("this should have thrown CoprocessorException");
+ } catch (CoprocessorException ex) {
+ // expected
+ }
+ } finally {
+ acl.close();
}
+ }
+
+ @Test
+ public void testLockAction() throws Exception {
+ PrivilegedExceptionAction lockAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preLockRow(ObserverContext.createAndPrepare(RCP_ENV, null), null,
+ Bytes.toBytes("random_row"));
+ return null;
+ }
+ };
+ verifyAllowed(lockAction, SUPERUSER, USER_ADMIN, USER_OWNER, USER_CREATE, USER_RW_ON_TABLE);
+ verifyDenied(lockAction, USER_RO, USER_RW, USER_NONE);
+ }
+
+ @Test
+ public void testUnLockAction() throws Exception {
+ PrivilegedExceptionAction unLockAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preUnlockRow(ObserverContext.createAndPrepare(RCP_ENV, null), null,
+ 123456);
+ return null;
+ }
+ };
+ verifyAllowed(unLockAction, SUPERUSER, USER_ADMIN, USER_OWNER, USER_RW_ON_TABLE);
+ verifyDenied(unLockAction, USER_NONE, USER_RO, USER_RW);
+ }
+
+ @Test
+ public void testStopRegionServer() throws Exception {
+ PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preStopRegionServer(ObserverContext.createAndPrepare(RSCP_ENV, null));
+ return null;
+ }
+ };
+ verifyAllowed(action, SUPERUSER, USER_ADMIN);
+ verifyDenied(action, USER_CREATE, USER_OWNER, USER_RW, USER_RO, USER_NONE);
}
+
+ @Test
+ public void testOpenRegion() throws Exception {
+ PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preOpen(ObserverContext.createAndPrepare(RCP_ENV, null));
+ return null;
+ }
+ };
+
+ verifyAllowed(action, SUPERUSER, USER_ADMIN);
+ verifyDenied(action, USER_CREATE, USER_RW, USER_RO, USER_NONE, USER_OWNER);
+ }
+
+ @Test
+ public void testCloseRegion() throws Exception {
+ PrivilegedExceptionAction action = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preClose(ObserverContext.createAndPrepare(RCP_ENV, null), false);
+ return null;
+ }
+ };
+
+ verifyAllowed(action, SUPERUSER, USER_ADMIN);
+ verifyDenied(action, USER_CREATE, USER_RW, USER_RO, USER_NONE, USER_OWNER);
+ }
+
+
+ @Test
+ public void testSnapshot() throws Exception {
+ PrivilegedExceptionAction snapshotAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preSnapshot(ObserverContext.createAndPrepare(CP_ENV, null),
+ null, null);
+ return null;
+ }
+ };
+
+ PrivilegedExceptionAction deleteAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preDeleteSnapshot(ObserverContext.createAndPrepare(CP_ENV, null),
+ null);
+ return null;
+ }
+ };
+
+ PrivilegedExceptionAction restoreAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preRestoreSnapshot(ObserverContext.createAndPrepare(CP_ENV, null),
+ null, null);
+ return null;
+ }
+ };
+
+ PrivilegedExceptionAction cloneAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ ACCESS_CONTROLLER.preCloneSnapshot(ObserverContext.createAndPrepare(CP_ENV, null),
+ null, null);
+ return null;
+ }
+ };
+
+ verifyAllowed(snapshotAction, SUPERUSER, USER_ADMIN);
+ verifyDenied(snapshotAction, USER_CREATE, USER_RW, USER_RO, USER_NONE, USER_OWNER);
+
+ verifyAllowed(cloneAction, SUPERUSER, USER_ADMIN);
+ verifyDenied(deleteAction, USER_CREATE, USER_RW, USER_RO, USER_NONE, USER_OWNER);
+
+ verifyAllowed(restoreAction, SUPERUSER, USER_ADMIN);
+ verifyDenied(restoreAction, USER_CREATE, USER_RW, USER_RO, USER_NONE, USER_OWNER);
+
+ verifyAllowed(deleteAction, SUPERUSER, USER_ADMIN);
+ verifyDenied(cloneAction, USER_CREATE, USER_RW, USER_RO, USER_NONE, USER_OWNER);
+ }
+
+ @Test
+ public void testGlobalAuthorizationForNewRegisteredRS() throws Exception {
+ LOG.debug("Test for global authorization for a new registered RegionServer.");
+ MiniHBaseCluster hbaseCluster = TEST_UTIL.getHBaseCluster();
+ final HRegionServer oldRs = hbaseCluster.getRegionServer(0);
+
+ // Since each RegionServer running on different user, add global
+ // permissions for the new user.
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(
+ AccessControllerProtocol.class, TEST_TABLE);
+ String currentUser = User.getCurrent().getShortName();
+ // User name for the new RegionServer we plan to add.
+ String activeUserForNewRs = currentUser + ".hfs."
+ + hbaseCluster.getLiveRegionServerThreads().size();
+
+ protocol.grant(new UserPermission(Bytes.toBytes(activeUserForNewRs),
+ Permission.Action.ADMIN, Permission.Action.CREATE,
+ Permission.Action.READ, Permission.Action.WRITE));
+
+ } finally {
+ acl.close();
+ }
+ final HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
+ HTableDescriptor htd = new HTableDescriptor(TEST_TABLE2);
+ htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
+ htd.setOwner(USER_OWNER);
+ admin.createTable(htd);
+
+ // Starting a new RegionServer.
+ JVMClusterUtil.RegionServerThread newRsThread = hbaseCluster
+ .startRegionServer();
+ final HRegionServer newRs = newRsThread.getRegionServer();
+
+ // Move region to the new RegionServer.
+ final HTable table = new HTable(TEST_UTIL.getConfiguration(), TEST_TABLE2);
+ try {
+ NavigableMap regions = table
+ .getRegionLocations();
+ final Map.Entry firstRegion = regions.entrySet()
+ .iterator().next();
+
+ PrivilegedExceptionAction moveAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ admin.move(firstRegion.getKey().getEncodedNameAsBytes(),
+ Bytes.toBytes(newRs.getServerName().getServerName()));
+ return null;
+ }
+ };
+ SUPERUSER.runAs(moveAction);
+
+ final int RETRIES_LIMIT = 10;
+ int retries = 0;
+ while (newRs.getOnlineRegions(TEST_TABLE2).size() < 1 && retries < RETRIES_LIMIT) {
+ LOG.debug("Waiting for region to be opened. Already retried " + retries
+ + " times.");
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException e) {
+ }
+ retries++;
+ if (retries == RETRIES_LIMIT - 1) {
+ fail("Retry exhaust for waiting region to be opened.");
+ }
+ }
+ // Verify write permission for user "admin2" who has the global
+ // permissions.
+ PrivilegedExceptionAction putAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ Put put = new Put(Bytes.toBytes("test"));
+ put.add(TEST_FAMILY, Bytes.toBytes("qual"), Bytes.toBytes("value"));
+ table.put(put);
+ return null;
+ }
+ };
+ USER_ADMIN.runAs(putAction);
+ } finally {
+ table.close();
+ }
+ }
+
+ @Test
+ public void testTableDescriptorsEnumeration() throws Exception {
+ User TABLE_ADMIN = User.createUserForTesting(conf, "UserA", new String[0]);
+
+ // Grant TABLE ADMIN privs on test table to UserA
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(
+ AccessControllerProtocol.class, TEST_TABLE);
+ protocol.grant(new UserPermission(Bytes.toBytes(TABLE_ADMIN.getShortName()),
+ TEST_TABLE, null, Permission.Action.ADMIN));
+ } finally {
+ acl.close();
+ }
+
+ PrivilegedExceptionAction listTablesAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
+ try {
+ admin.listTables();
+ } finally {
+ admin.close();
+ }
+ return null;
+ }
+ };
+
+ PrivilegedExceptionAction getTableDescAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
+ try {
+ admin.getTableDescriptor(TEST_TABLE);
+ } finally {
+ admin.close();
+ }
+ return null;
+ }
+ };
+
+ verifyAllowed(listTablesAction, SUPERUSER, USER_ADMIN);
+ verifyDenied(listTablesAction, USER_CREATE, USER_RW, USER_RO, USER_NONE, TABLE_ADMIN);
+
+ verifyAllowed(getTableDescAction, SUPERUSER, USER_ADMIN, USER_CREATE, TABLE_ADMIN);
+ verifyDenied(getTableDescAction, USER_RW, USER_RO, USER_NONE);
+ }
+
+ @Test
+ public void testTableDeletion() throws Exception {
+ final User tableAdmin = User.createUserForTesting(conf, "TestUser", new String[0]);
+
+ // We need to create a new table here because we will be testing what
+ // happens when it is deleted
+ final byte[] tableName = Bytes.toBytes("testTableDeletion");
+ HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
+ HTableDescriptor htd = new HTableDescriptor(tableName);
+ htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
+ admin.createTable(htd);
+ TEST_UTIL.waitTableEnabled(tableName, 5000);
+
+ // Grant TABLE ADMIN privs
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(
+ AccessControllerProtocol.class, tableName);
+ protocol.grant(new UserPermission(Bytes.toBytes(tableAdmin.getShortName()),
+ tableName, null, Permission.Action.ADMIN));
+ } finally {
+ acl.close();
+ }
+
+ PrivilegedExceptionAction deleteTableAction = new PrivilegedExceptionAction() {
+ public Object run() throws Exception {
+ HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
+ try {
+ admin.disableTable(tableName);
+ admin.deleteTable(tableName);
+ } finally {
+ admin.close();
+ }
+ return null;
+ }
+ };
+
+ verifyDenied(deleteTableAction, USER_RW, USER_RO, USER_NONE);
+ verifyAllowed(deleteTableAction, tableAdmin);
+ }
+
+ @Test
+ public void testCreateWithCorrectOwner() throws Exception {
+ final byte[] tableName = Bytes.toBytes("testCreateWithCorrectOwner");
+
+ // Create a test user
+ User testUser = User.createUserForTesting(TEST_UTIL.getConfiguration(), "TestUser",
+ new String[0]);
+
+ // Grant the test user the ability to create tables
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(
+ AccessControllerProtocol.class, AccessControlLists.ACL_TABLE_NAME);
+ protocol.grant(new UserPermission(Bytes.toBytes(testUser.getShortName()),
+ Permission.Action.CREATE));
+ } finally {
+ acl.close();
+ }
+
+ verifyAllowed(new PrivilegedExceptionAction() {
+ @Override
+ public Object run() throws Exception {
+ HTableDescriptor desc = new HTableDescriptor(tableName);
+ desc.addFamily(new HColumnDescriptor(TEST_FAMILY));
+ HBaseAdmin admin = new HBaseAdmin(conf);
+ try {
+ admin.createTable(desc);
+ } finally {
+ admin.close();
+ }
+ return null;
+ }
+ }, testUser);
+ TEST_UTIL.waitTableEnabled(tableName, 5000);
+
+ // Verify that owner permissions have been granted to the test user on the
+ // table just created
+ List perms = AccessControlLists.getTablePermissions(conf, tableName)
+ .get(testUser.getShortName());
+ assertNotNull(perms);
+ assertFalse(perms.isEmpty());
+ // Should be RWXCA
+ assertTrue(perms.get(0).implies(Permission.Action.READ));
+ assertTrue(perms.get(0).implies(Permission.Action.WRITE));
+ assertTrue(perms.get(0).implies(Permission.Action.EXEC));
+ assertTrue(perms.get(0).implies(Permission.Action.CREATE));
+ assertTrue(perms.get(0).implies(Permission.Action.ADMIN));
+ }
+
+ @Test
+ public void testACLTableAccess() throws Exception {
+ final Configuration conf = TEST_UTIL.getConfiguration();
+
+ final byte[] tableName = Bytes.toBytes("testACLTableAccess");
+ HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
+ HTableDescriptor htd = new HTableDescriptor(tableName);
+ htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
+ admin.createTable(htd);
+ TEST_UTIL.waitTableEnabled(tableName, 5000);
+
+ // Global users
+ User globalRead = User.createUserForTesting(conf, "globalRead", new String[0]);
+ User globalWrite = User.createUserForTesting(conf, "globalWrite", new String[0]);
+ User globalCreate = User.createUserForTesting(conf, "globalCreate", new String[0]);
+ User globalAdmin = User.createUserForTesting(conf, "globalAdmin", new String[0]);
+
+ // Table users
+ User tableRead = User.createUserForTesting(conf, "tableRead", new String[0]);
+ User tableWrite = User.createUserForTesting(conf, "tableWrite", new String[0]);
+ User tableCreate = User.createUserForTesting(conf, "tableCreate", new String[0]);
+ User tableAdmin = User.createUserForTesting(conf, "tableAdmin", new String[0]);
+
+ // Set up grants
+ HTable acl = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ AccessControllerProtocol protocol = acl.coprocessorProxy(
+ AccessControllerProtocol.class, AccessControlLists.ACL_TABLE_NAME);
+ protocol.grant(new UserPermission(Bytes.toBytes(globalRead.getShortName()), Action.READ));
+ protocol.grant(new UserPermission(Bytes.toBytes(globalWrite.getShortName()), Action.WRITE));
+ protocol.grant(new UserPermission(Bytes.toBytes(globalCreate.getShortName()),
+ Action.CREATE));
+ protocol.grant(new UserPermission(Bytes.toBytes(globalAdmin.getShortName()),
+ Action.ADMIN));
+ protocol.grant(new UserPermission(Bytes.toBytes(tableRead.getShortName()), tableName,
+ null, Action.READ));
+ protocol.grant(new UserPermission(Bytes.toBytes(tableWrite.getShortName()), tableName,
+ null, Action.WRITE));
+ protocol.grant(new UserPermission(Bytes.toBytes(tableCreate.getShortName()), tableName,
+ null, Action.CREATE));
+ protocol.grant(new UserPermission(Bytes.toBytes(tableAdmin.getShortName()), tableName,
+ null, Action.ADMIN));
+ } finally {
+ acl.close();
+ }
+
+ // Write tests
+
+ PrivilegedExceptionAction writeAction = new PrivilegedExceptionAction() {
+ @Override
+ public Object run() throws Exception {
+ HTable t = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ t.put(new Put(Bytes.toBytes("test")).add(AccessControlLists.ACL_LIST_FAMILY,
+ Bytes.toBytes("q"), Bytes.toBytes("value")));
+ return null;
+ } finally {
+ t.close();
+ }
+ }
+ };
+
+ // All writes to ACL table denied except for GLOBAL WRITE permission and superuser
+
+ verifyDenied(writeAction, globalAdmin, globalCreate, globalRead);
+ verifyDenied(writeAction, tableAdmin, tableCreate, tableRead, tableWrite);
+ verifyAllowed(writeAction, SUPERUSER, globalWrite);
+
+ // Read tests
+
+ PrivilegedExceptionAction scanAction = new PrivilegedExceptionAction() {
+ @Override
+ public Object run() throws Exception {
+ HTable t = new HTable(conf, AccessControlLists.ACL_TABLE_NAME);
+ try {
+ ResultScanner s = t.getScanner(new Scan());
+ try {
+ for (Result r = s.next(); r != null; r = s.next()) {
+ // do nothing
+ }
+ } finally {
+ s.close();
+ }
+ return null;
+ } finally {
+ t.close();
+ }
+ }
+ };
+
+ // All reads from ACL table denied except for GLOBAL READ and superuser
+
+ verifyDenied(scanAction, globalAdmin, globalCreate, globalWrite);
+ verifyDenied(scanAction, tableCreate, tableAdmin, tableRead, tableWrite);
+ verifyAllowed(scanAction, SUPERUSER, globalRead);
+ }
+
}
diff --git a/security/src/test/java/org/apache/hadoop/hbase/security/access/TestTablePermissions.java b/security/src/test/java/org/apache/hadoop/hbase/security/access/TestTablePermissions.java
index 39fc73e78985..a3a0f8a9610e 100644
--- a/security/src/test/java/org/apache/hadoop/hbase/security/access/TestTablePermissions.java
+++ b/security/src/test/java/org/apache/hadoop/hbase/security/access/TestTablePermissions.java
@@ -42,8 +42,10 @@
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.junit.After;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -87,6 +89,10 @@ public static void beforeClass() throws Exception {
SecureTestUtil.enableSecurity(conf);
UTIL.startMiniCluster();
+
+ // Wait for the ACL table to become available
+ UTIL.waitTableAvailable(AccessControlLists.ACL_TABLE_NAME, 30000);
+
ZKW = new ZooKeeperWatcher(UTIL.getConfiguration(),
"TestTablePermissions", ABORTABLE);
@@ -99,19 +105,28 @@ public static void afterClass() throws Exception {
UTIL.shutdownMiniCluster();
}
+ @After
+ public void tearDown() throws Exception {
+ Configuration conf = UTIL.getConfiguration();
+ AccessControlLists.removeTablePermissions(conf, TEST_TABLE);
+ AccessControlLists.removeTablePermissions(conf, TEST_TABLE2);
+ AccessControlLists.removeTablePermissions(conf, AccessControlLists.ACL_TABLE_NAME);
+ }
+
@Test
public void testBasicWrite() throws Exception {
Configuration conf = UTIL.getConfiguration();
// add some permissions
- AccessControlLists.addTablePermission(conf, TEST_TABLE,
- "george", new TablePermission(TEST_TABLE, null,
- TablePermission.Action.READ, TablePermission.Action.WRITE));
- AccessControlLists.addTablePermission(conf, TEST_TABLE,
- "hubert", new TablePermission(TEST_TABLE, null,
- TablePermission.Action.READ));
- AccessControlLists.addTablePermission(conf, TEST_TABLE,
- "humphrey", new TablePermission(TEST_TABLE, TEST_FAMILY, TEST_QUALIFIER,
- TablePermission.Action.READ));
+ AccessControlLists.addUserPermission(conf,
+ new UserPermission(Bytes.toBytes("george"), TEST_TABLE, null, (byte[])null,
+ UserPermission.Action.READ, UserPermission.Action.WRITE));
+ AccessControlLists.addUserPermission(conf,
+ new UserPermission(Bytes.toBytes("hubert"), TEST_TABLE, null, (byte[])null,
+ UserPermission.Action.READ));
+ AccessControlLists.addUserPermission(conf,
+ new UserPermission(Bytes.toBytes("humphrey"),
+ TEST_TABLE, TEST_FAMILY, TEST_QUALIFIER,
+ UserPermission.Action.READ));
// retrieve the same
ListMultimap perms =
@@ -165,8 +180,8 @@ public void testBasicWrite() throws Exception {
assertFalse(actions.contains(TablePermission.Action.WRITE));
// table 2 permissions
- AccessControlLists.addTablePermission(conf, TEST_TABLE2, "hubert",
- new TablePermission(TEST_TABLE2, null,
+ AccessControlLists.addUserPermission(conf,
+ new UserPermission(Bytes.toBytes("hubert"), TEST_TABLE2, null, (byte[])null,
TablePermission.Action.READ, TablePermission.Action.WRITE));
// check full load
@@ -197,16 +212,21 @@ public void testBasicWrite() throws Exception {
@Test
public void testPersistence() throws Exception {
Configuration conf = UTIL.getConfiguration();
- AccessControlLists.addTablePermission(conf, TEST_TABLE, "albert",
- new TablePermission(TEST_TABLE, null, TablePermission.Action.READ));
- AccessControlLists.addTablePermission(conf, TEST_TABLE, "betty",
- new TablePermission(TEST_TABLE, null, TablePermission.Action.READ,
- TablePermission.Action.WRITE));
- AccessControlLists.addTablePermission(conf, TEST_TABLE, "clark",
- new TablePermission(TEST_TABLE, TEST_FAMILY, TablePermission.Action.READ));
- AccessControlLists.addTablePermission(conf, TEST_TABLE, "dwight",
- new TablePermission(TEST_TABLE, TEST_FAMILY, TEST_QUALIFIER,
- TablePermission.Action.WRITE));
+ AccessControlLists.addUserPermission(conf,
+ new UserPermission(Bytes.toBytes("albert"), TEST_TABLE, null,
+ (byte[])null, TablePermission.Action.READ));
+ AccessControlLists.addUserPermission(conf,
+ new UserPermission(Bytes.toBytes("betty"), TEST_TABLE, null,
+ (byte[])null, TablePermission.Action.READ,
+ TablePermission.Action.WRITE));
+ AccessControlLists.addUserPermission(conf,
+ new UserPermission(Bytes.toBytes("clark"),
+ TEST_TABLE, TEST_FAMILY,
+ TablePermission.Action.READ));
+ AccessControlLists.addUserPermission(conf,
+ new UserPermission(Bytes.toBytes("dwight"),
+ TEST_TABLE, TEST_FAMILY, TEST_QUALIFIER,
+ TablePermission.Action.WRITE));
// verify permissions survive changes in table metadata
ListMultimap preperms =
@@ -313,4 +333,60 @@ public void testEquals() throws Exception {
assertFalse(p1.equals(p2));
assertFalse(p2.equals(p1));
}
+
+ @Test
+ public void testGlobalPermission() throws Exception {
+ Configuration conf = UTIL.getConfiguration();
+
+ // add some permissions
+ AccessControlLists.addUserPermission(conf,
+ new UserPermission(Bytes.toBytes("user1"),
+ Permission.Action.READ, Permission.Action.WRITE));
+ AccessControlLists.addUserPermission(conf,
+ new UserPermission(Bytes.toBytes("user2"),
+ Permission.Action.CREATE));
+ AccessControlLists.addUserPermission(conf,
+ new UserPermission(Bytes.toBytes("user3"),
+ Permission.Action.ADMIN, Permission.Action.READ, Permission.Action.CREATE));
+
+ ListMultimap perms = AccessControlLists.getTablePermissions(conf, null);
+ List user1Perms = perms.get("user1");
+ assertEquals("Should have 1 permission for user1", 1, user1Perms.size());
+ assertEquals("user1 should have WRITE permission",
+ new Permission.Action[] { Permission.Action.READ, Permission.Action.WRITE },
+ user1Perms.get(0).getActions());
+
+ List user2Perms = perms.get("user2");
+ assertEquals("Should have 1 permission for user2", 1, user2Perms.size());
+ assertEquals("user2 should have CREATE permission",
+ new Permission.Action[] { Permission.Action.CREATE },
+ user2Perms.get(0).getActions());
+
+ List user3Perms = perms.get("user3");
+ assertEquals("Should have 1 permission for user3", 1, user3Perms.size());
+ assertEquals("user3 should have ADMIN, READ, CREATE permission",
+ new Permission.Action[] {
+ Permission.Action.ADMIN, Permission.Action.READ, Permission.Action.CREATE
+ },
+ user3Perms.get(0).getActions());
+ }
+
+ @Test
+ public void testAuthManager() throws Exception {
+ Configuration conf = UTIL.getConfiguration();
+ /* test a race condition causing TableAuthManager to sometimes fail global permissions checks
+ * when the global cache is being updated
+ */
+ TableAuthManager authManager = TableAuthManager.get(ZKW, conf);
+ // currently running user is the system user and should have global admin perms
+ User currentUser = User.getCurrent();
+ assertTrue(authManager.authorize(currentUser, Permission.Action.ADMIN));
+ for (int i=1; i<=50; i++) {
+ AccessControlLists.addUserPermission(conf, new UserPermission(Bytes.toBytes("testauth"+i),
+ Permission.Action.ADMIN, Permission.Action.READ, Permission.Action.WRITE));
+ // make sure the system user still shows as authorized
+ assertTrue("Failed current user auth check on iter "+i,
+ authManager.authorize(currentUser, Permission.Action.ADMIN));
+ }
+ }
}
diff --git a/security/src/test/resources/hbase-site.xml b/security/src/test/resources/hbase-site.xml
index dcc7df2fe57b..4f1dd5f15d0c 100644
--- a/security/src/test/resources/hbase-site.xml
+++ b/security/src/test/resources/hbase-site.xml
@@ -2,8 +2,6 @@
+
+ ${basedir}/NOTICE.txt
+ ${basedir}/LICENSE.txt
+
@@ -108,4 +113,26 @@
0644
+
+
+
+
+ .
+ LICENSE.txt
+ unix
+
+
+
+ .
+ NOTICE.txt
+ unix
+
+
+
+ .
+ LEGAL
+ unix
+
+
+
diff --git a/src/assembly/resources/supplemental-models.xml b/src/assembly/resources/supplemental-models.xml
new file mode 100644
index 000000000000..6b727f9c836c
--- /dev/null
+++ b/src/assembly/resources/supplemental-models.xml
@@ -0,0 +1,1664 @@
+
+
+
+
+
+
+
+
+ org.apache.zookeeper
+ zookeeper
+
+
+ The Apache Software Foundation
+ http://www.apache.org/
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ commons-beanutils
+ commons-beanutils
+
+
+ The Apache Software Foundation
+ http://www.apache.org/
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-core
+
+
+ The Apache Software Foundation
+ http://www.apache.org/
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ org.xerial.snappy
+ snappy-java
+
+
+ The Apache Software Foundation
+ http://www.apache.org/
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+
+ com.github.stephenc.findbugs
+ findbugs-annotations
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ com.github.stephenc.high-scale-lib
+ high-scale-lib
+ Highly Scalable Java
+
+
+
+ Public Domain
+ repo
+ http://creativecommons.org/licenses/publicdomain/
+
+The person or persons who have associated work with this document (the
+"Dedicator" or "Certifier") hereby either (a) certifies that, to the best
+of his knowledge, the work of authorship identified is in the public
+domain of the country from which the work is published, or (b) hereby
+dedicates whatever copyright the dedicators holds in the work of
+authorship identified below (the "Work") to the public domain. A
+certifier, moreover, dedicates any copyright interest he may have in the
+associated work, and for these purposes, is described as a "dedicator"
+below.
+
+A certifier has taken reasonable steps to verify the copyright status of
+this work. Certifier recognizes that his good faith efforts may not
+shield him from liability if in fact the work certified is not in the
+public domain.
+
+Dedicator makes this dedication for the benefit of the public at large and
+to the detriment of the Dedicator's heirs and successors. Dedicator
+intends this dedication to be an overt act of relinquishment in perpetuity
+of all present and future rights under copyright law, whether vested or
+contingent, in the Work. Dedicator understands that such relinquishment of
+all rights includes the relinquishment of all rights to enforce (by
+lawsuit or otherwise) those copyrights in the Work.
+
+Dedicator recognizes that, once placed in the public domain, the Work may
+be freely reproduced, distributed, transmitted, used, modified, built
+upon, or otherwise exploited by anyone for any purpose, commercial or
+non-commercial, and in any way, including by methods that have not yet
+been invented or conceived.
+
+
+
+
+
+
+
+ org.apache.httpcomponents
+ httpclient
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ org.apache.httpcomponents
+ httpcore
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ org.jboss.netty
+ netty
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ io.netty
+ netty
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ commons-httpclient
+ commons-httpclient
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ org.mortbay.jetty
+ jetty-util
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ org.cloudera.htrace
+ htrace-core
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+
+ net.java.dev.jets3t
+ jets3t
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ org.mortbay.jetty
+ jetty
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ org.mortbay.jetty
+ jetty-sslengine
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ org.mortbay.jetty
+ jsp-api-2.1
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ com.yammer.metrics
+ metrics-core
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+ org.codehaus.jettison
+ jettison
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+Copyright 2006 Envoi Solutions LLC
+
+
+
+
+
+
+
+
+ com.google.protobuf
+ protobuf-java
+ Protocol Buffer Java API
+
+
+
+
+ New BSD license
+ http://www.opensource.org/licenses/bsd-license.php
+ repo
+
+Copyright 2008, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Code generated by the Protocol Buffer compiler is owned by the owner
+of the input file used when generating it. This code is not
+standalone and requires a support library to be linked with it. This
+support library is itself covered by the above license.
+
+
+
+
+
+
+
+ com.jcraft
+ jsch
+ JSch
+
+
+
+
+ BSD license
+ http://www.jcraft.com/jsch/LICENSE.txt
+
+Copyright (c) 2002-2015 Atsuhiko Yamanaka, JCraft,Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the distribution.
+
+ 3. The names of the authors may not be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
+INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JCRAFT,
+INC. OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+
+
+
+
+
+ com.thoughtworks.paranamer
+ paranamer
+ ParaNamer Core
+
+
+
+ BSD 3-Clause License
+ https://github.com/codehaus/paranamer-git/blob/paranamer-2.3/LICENSE.txt
+ repo
+
+ Copyright (c) 2006 Paul Hammant & ThoughtWorks Inc
+
+
+
+
+
+
+
+ org.jruby.jcodings
+ jcodings
+ JCodings
+
+
+
+ MIT License
+ http://www.opensource.org/licenses/mit-license.php
+ repo
+
+Copyright (c) 2008-2012 The JCodings Authors
+
+
+
+
+
+
+
+ org.jruby.joni
+ joni
+ Joni
+
+
+
+ MIT License
+ http://www.opensource.org/licenses/mit-license.php
+ repo
+
+Copyright (c) 2008-2014 The Joni Authors
+
+
+
+
+
+
+
+ org.slf4j
+ slf4j-api
+ SLF4J API Module
+
+
+
+ MIT License
+ http://www.opensource.org/licenses/mit-license.php
+ repo
+
+Copyright (c) 2004-2013 QOS.ch
+
+
+
+
+
+
+
+ org.slf4j
+ slf4j-log4j12
+ SLF4J LOG4J-12 Binding
+
+
+
+ MIT License
+ http://www.opensource.org/licenses/mit-license.php
+ repo
+
+Copyright (c) 2004-2008 QOS.ch
+
+
+
+
+
+
+
+ xmlenc
+ xmlenc
+ xmlenc Library
+
+
+
+ BSD 3-Clause License
+ http://www.opensource.org/licenses/bsd-license.php
+ repo
+
+Copyright 2003-2005, Ernst de Haan <wfe.dehaan@gmail.com>
+
+
+
+
+
+
+
+ org.tukaani
+ xz
+
+
+
+ Public Domain
+ repo
+
+Licensing of XZ for Java
+========================
+
+ All the files in this package have been written by Lasse Collin
+ and/or Igor Pavlov. All these files have been put into the
+ public domain. You can do whatever you want with these files.
+
+ This software is provided "as is", without any warranty.
+
+
+
+
+
+
+
+
+ aopalliance
+ aopalliance
+ AOP alliance
+
+
+
+ Public Domain
+ repo
+
+LICENCE: all the source code provided by AOP Alliance is Public Domain.
+
+
+
+
+
+
+
+ asm
+ asm
+ ASM: a very small and fast Java bytecode manipulation framework
+
+
+
+ BSD 3-Clause License
+ http://cvs.forge.objectweb.org/cgi-bin/viewcvs.cgi/*checkout*/asm/asm/LICENSE.txt?rev=1.3&only_with_tag=ASM_3_1_MVN
+ repo
+
+Copyright (c) 2000-2005 INRIA, France Telecom
+
+
+
+
+
+
+
+ org.fusesource.leveldbjni
+ leveldbjni-all
+
+
+
+
+ BSD 3-Clause License
+ http://www.opensource.org/licenses/BSD-3-Clause
+ repo
+
+Copyright (c) 2011 FuseSource Corp. All rights reserved.
+
+
+
+
+
+
+
+
+ org.hamcrest
+ hamcrest-core
+
+
+
+ New BSD license
+ http://www.opensource.org/licenses/bsd-license.php
+ repo
+
+Copyright (c) 2000-2006, www.hamcrest.org
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the distribution.
+
+Neither the name of Hamcrest nor the names of its contributors may be used to endorse
+or promote products derived from this software without specific prior written
+permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+
+
+
+
+
+
+
+
+
+ javax.activation
+ activation
+ JavaBeans Activation Framework (JAF)
+ http://java.sun.com/products/javabeans/jaf/index.jsp
+
+
+
+ Common Development and Distribution License (CDDL) v1.0
+ https://glassfish.dev.java.net/public/CDDLv1.0.html
+ repo
+
+
+
+
+
+
+
+
+ javax.xml.bind
+ jaxb-api
+ JAXB API bundle for GlassFish V3
+
+
+
+ CDDL 1.1
+ https://glassfish.java.net/public/CDDL+GPL_1_1.html
+ repo
+
+Copyright (c) 2010 Oracle and/or its affiliates.
+
+
+
+
+
+
+
+ junit
+ junit
+ JUnit
+ http://junit.org/
+
+
+
+ Common Public License Version 1.0
+ http://www.opensource.org/licenses/cpl1.0.txt
+ repo
+
+
+
+
+
+
+
+
+
+ com.sun.jersey
+ jersey-client
+
+ https://java.net/projects/jersey/
+
+
+
+ CDDL 1.1
+ https://glassfish.java.net/public/CDDL+GPL_1_1.html
+ repo
+
+Copyright (c) 2010-2011 Oracle and/or its affiliates.
+
+
+
+
+
+
+
+ com.sun.jersey
+ jersey-core
+ https://java.net/projects/jersey/
+
+
+
+ CDDL 1.1
+ https://glassfish.java.net/public/CDDL+GPL_1_1.html
+ repo
+
+Copyright (c) 2010-2011 Oracle and/or its affiliates.
+
+
+
+
+
+
+
+ com.sun.jersey
+ jersey-json
+ https://java.net/projects/jersey/
+
+
+
+ CDDL 1.1
+ https://glassfish.java.net/public/CDDL+GPL_1_1.html
+ repo
+
+Copyright (c) 2010-2011 Oracle and/or its affiliates.
+
+
+
+
+
+
+
+ com.sun.jersey
+ jersey-server
+ https://java.net/projects/jersey/
+
+
+
+ CDDL 1.1
+ https://glassfish.java.net/public/CDDL+GPL_1_1.html
+ repo
+
+Copyright (c) 2010-2011 Oracle and/or its affiliates.
+
+
+
+
+
+
+
+ com.sun.jersey.contribs
+ jersey-guice
+ https://java.net/projects/jersey/
+
+
+
+ CDDL 1.1
+ https://glassfish.java.net/public/CDDL+GPL_1_1.html
+ repo
+
+Copyright (c) 2010-2011 Oracle and/or its affiliates.
+
+
+
+
+
+
+
+ com.sun.xml.bind
+ jaxb-impl
+ JAXB Reference Implementation for GlassFish
+ https://jaxb.java.net/
+
+
+
+ CDDL 1.1
+ https://glassfish.java.net/public/CDDL+GPL_1_1.html
+ repo
+
+Copyright (c) 2010 Oracle and/or its affiliates.
+
+
+
+
+
+
+
+ javax.xml.bind
+ jaxb-api
+ JAXB API bundle for GlassFish V3
+ https://jaxb.java.net/
+
+
+
+ CDDL 1.1
+ https://glassfish.java.net/public/CDDL+GPL_1_1.html
+ repo
+
+Copyright (c) 2010 Oracle and/or its affiliates.
+
+
+
+
+
+
+
+
+ javax.servlet
+ servlet-api
+ Java Servlet API v2.5
+ http://search.maven.org/#artifactdetails%7Cjavax.servlet%7Cservlet-api%7C2.5%7Cjar
+
+
+
+ Common Development and Distribution License (CDDL) v1.0
+ https://glassfish.dev.java.net/public/CDDLv1.0.html
+ repo
+
+Copyright 1999-2005 Sun Microsystems, Inc.
+Portions copyright 2002 International Business Machines Corporation
+Portions copyright Apache Software Foundation
+
+
+
+
+
+
+
+ org.mortbay.jetty
+ servlet-api-2.5
+ Servlet Specification 2.5 API
+ http://www.eclipse.org/jetty/
+
+
+
+ Common Development and Distribution License (CDDL) v1.0
+ https://glassfish.dev.java.net/public/CDDLv1.0.html
+ repo
+
+Copyright 1999-2005 Sun Microsystems, Inc.
+Portions copyright 2002 International Business Machines Corporation
+Portions copyright Apache Software Foundation
+
+
+
+
+
+
+
+ org.mortbay.jetty
+ jsp-2.1
+ JSP2.1 Jasper implementation from Glassfish
+ http://www.eclipse.org/jetty/
+
+
+
+ Common Development and Distribution License (CDDL) v1.0
+ https://glassfish.dev.java.net/public/CDDLv1.0.html
+ repo
+
+
+Copyright 2005 Sun Microsystems, Inc. and portions Copyright Apache Software Foundation.
+
+
+
+
+
+
+
+ org.jamon
+ jamon-runtime
+ Jamon runtime support classes
+ http://www.jamon.org/
+
+
+
+ http://www.mozilla.org/MPL/MPL-1.1.txt
+ Mozilla Public License Version 1.1
+ repo
+
+ MOZILLA PUBLIC LICENSE
+ Version 1.1
+
+ ---------------
+
+1. Definitions.
+
+ 1.0.1. "Commercial Use" means distribution or otherwise making the
+ Covered Code available to a third party.
+
+ 1.1. "Contributor" means each entity that creates or contributes to
+ the creation of Modifications.
+
+ 1.2. "Contributor Version" means the combination of the Original
+ Code, prior Modifications used by a Contributor, and the Modifications
+ made by that particular Contributor.
+
+ 1.3. "Covered Code" means the Original Code or Modifications or the
+ combination of the Original Code and Modifications, in each case
+ including portions thereof.
+
+ 1.4. "Electronic Distribution Mechanism" means a mechanism generally
+ accepted in the software development community for the electronic
+ transfer of data.
+
+ 1.5. "Executable" means Covered Code in any form other than Source
+ Code.
+
+ 1.6. "Initial Developer" means the individual or entity identified
+ as the Initial Developer in the Source Code notice required by Exhibit
+ A.
+
+ 1.7. "Larger Work" means a work which combines Covered Code or
+ portions thereof with code not governed by the terms of this License.
+
+ 1.8. "License" means this document.
+
+ 1.8.1. "Licensable" means having the right to grant, to the maximum
+ extent possible, whether at the time of the initial grant or
+ subsequently acquired, any and all of the rights conveyed herein.
+
+ 1.9. "Modifications" means any addition to or deletion from the
+ substance or structure of either the Original Code or any previous
+ Modifications. When Covered Code is released as a series of files, a
+ Modification is:
+ A. Any addition to or deletion from the contents of a file
+ containing Original Code or previous Modifications.
+
+ B. Any new file that contains any part of the Original Code or
+ previous Modifications.
+
+ 1.10. "Original Code" means Source Code of computer software code
+ which is described in the Source Code notice required by Exhibit A as
+ Original Code, and which, at the time of its release under this
+ License is not already Covered Code governed by this License.
+
+ 1.10.1. "Patent Claims" means any patent claim(s), now owned or
+ hereafter acquired, including without limitation, method, process,
+ and apparatus claims, in any patent Licensable by grantor.
+
+ 1.11. "Source Code" means the preferred form of the Covered Code for
+ making modifications to it, including all modules it contains, plus
+ any associated interface definition files, scripts used to control
+ compilation and installation of an Executable, or source code
+ differential comparisons against either the Original Code or another
+ well known, available Covered Code of the Contributor's choice. The
+ Source Code can be in a compressed or archival form, provided the
+ appropriate decompression or de-archiving software is widely available
+ for no charge.
+
+ 1.12. "You" (or "Your") means an individual or a legal entity
+ exercising rights under, and complying with all of the terms of, this
+ License or a future version of this License issued under Section 6.1.
+ For legal entities, "You" includes any entity which controls, is
+ controlled by, or is under common control with You. For purposes of
+ this definition, "control" means (a) the power, direct or indirect,
+ to cause the direction or management of such entity, whether by
+ contract or otherwise, or (b) ownership of more than fifty percent
+ (50%) of the outstanding shares or beneficial ownership of such
+ entity.
+
+2. Source Code License.
+
+ 2.1. The Initial Developer Grant.
+ The Initial Developer hereby grants You a world-wide, royalty-free,
+ non-exclusive license, subject to third party intellectual property
+ claims:
+ (a) under intellectual property rights (other than patent or
+ trademark) Licensable by Initial Developer to use, reproduce,
+ modify, display, perform, sublicense and distribute the Original
+ Code (or portions thereof) with or without Modifications, and/or
+ as part of a Larger Work; and
+
+ (b) under Patents Claims infringed by the making, using or
+ selling of Original Code, to make, have made, use, practice,
+ sell, and offer for sale, and/or otherwise dispose of the
+ Original Code (or portions thereof).
+
+ (c) the licenses granted in this Section 2.1(a) and (b) are
+ effective on the date Initial Developer first distributes
+ Original Code under the terms of this License.
+
+ (d) Notwithstanding Section 2.1(b) above, no patent license is
+ granted: 1) for code that You delete from the Original Code; 2)
+ separate from the Original Code; or 3) for infringements caused
+ by: i) the modification of the Original Code or ii) the
+ combination of the Original Code with other software or devices.
+
+ 2.2. Contributor Grant.
+ Subject to third party intellectual property claims, each Contributor
+ hereby grants You a world-wide, royalty-free, non-exclusive license
+
+ (a) under intellectual property rights (other than patent or
+ trademark) Licensable by Contributor, to use, reproduce, modify,
+ display, perform, sublicense and distribute the Modifications
+ created by such Contributor (or portions thereof) either on an
+ unmodified basis, with other Modifications, as Covered Code
+ and/or as part of a Larger Work; and
+
+ (b) under Patent Claims infringed by the making, using, or
+ selling of Modifications made by that Contributor either alone
+ and/or in combination with its Contributor Version (or portions
+ of such combination), to make, use, sell, offer for sale, have
+ made, and/or otherwise dispose of: 1) Modifications made by that
+ Contributor (or portions thereof); and 2) the combination of
+ Modifications made by that Contributor with its Contributor
+ Version (or portions of such combination).
+
+ (c) the licenses granted in Sections 2.2(a) and 2.2(b) are
+ effective on the date Contributor first makes Commercial Use of
+ the Covered Code.
+
+ (d) Notwithstanding Section 2.2(b) above, no patent license is
+ granted: 1) for any code that Contributor has deleted from the
+ Contributor Version; 2) separate from the Contributor Version;
+ 3) for infringements caused by: i) third party modifications of
+ Contributor Version or ii) the combination of Modifications made
+ by that Contributor with other software (except as part of the
+ Contributor Version) or other devices; or 4) under Patent Claims
+ infringed by Covered Code in the absence of Modifications made by
+ that Contributor.
+
+3. Distribution Obligations.
+
+ 3.1. Application of License.
+ The Modifications which You create or to which You contribute are
+ governed by the terms of this License, including without limitation
+ Section 2.2. The Source Code version of Covered Code may be
+ distributed only under the terms of this License or a future version
+ of this License released under Section 6.1, and You must include a
+ copy of this License with every copy of the Source Code You
+ distribute. You may not offer or impose any terms on any Source Code
+ version that alters or restricts the applicable version of this
+ License or the recipients' rights hereunder. However, You may include
+ an additional document offering the additional rights described in
+ Section 3.5.
+
+ 3.2. Availability of Source Code.
+ Any Modification which You create or to which You contribute must be
+ made available in Source Code form under the terms of this License
+ either on the same media as an Executable version or via an accepted
+ Electronic Distribution Mechanism to anyone to whom you made an
+ Executable version available; and if made available via Electronic
+ Distribution Mechanism, must remain available for at least twelve (12)
+ months after the date it initially became available, or at least six
+ (6) months after a subsequent version of that particular Modification
+ has been made available to such recipients. You are responsible for
+ ensuring that the Source Code version remains available even if the
+ Electronic Distribution Mechanism is maintained by a third party.
+
+ 3.3. Description of Modifications.
+ You must cause all Covered Code to which You contribute to contain a
+ file documenting the changes You made to create that Covered Code and
+ the date of any change. You must include a prominent statement that
+ the Modification is derived, directly or indirectly, from Original
+ Code provided by the Initial Developer and including the name of the
+ Initial Developer in (a) the Source Code, and (b) in any notice in an
+ Executable version or related documentation in which You describe the
+ origin or ownership of the Covered Code.
+
+ 3.4. Intellectual Property Matters
+ (a) Third Party Claims.
+ If Contributor has knowledge that a license under a third party's
+ intellectual property rights is required to exercise the rights
+ granted by such Contributor under Sections 2.1 or 2.2,
+ Contributor must include a text file with the Source Code
+ distribution titled "LEGAL" which describes the claim and the
+ party making the claim in sufficient detail that a recipient will
+ know whom to contact. If Contributor obtains such knowledge after
+ the Modification is made available as described in Section 3.2,
+ Contributor shall promptly modify the LEGAL file in all copies
+ Contributor makes available thereafter and shall take other steps
+ (such as notifying appropriate mailing lists or newsgroups)
+ reasonably calculated to inform those who received the Covered
+ Code that new knowledge has been obtained.
+
+ (b) Contributor APIs.
+ If Contributor's Modifications include an application programming
+ interface and Contributor has knowledge of patent licenses which
+ are reasonably necessary to implement that API, Contributor must
+ also include this information in the LEGAL file.
+
+ (c) Representations.
+ Contributor represents that, except as disclosed pursuant to
+ Section 3.4(a) above, Contributor believes that Contributor's
+ Modifications are Contributor's original creation(s) and/or
+ Contributor has sufficient rights to grant the rights conveyed by
+ this License.
+
+ 3.5. Required Notices.
+ You must duplicate the notice in Exhibit A in each file of the Source
+ Code. If it is not possible to put such notice in a particular Source
+ Code file due to its structure, then You must include such notice in a
+ location (such as a relevant directory) where a user would be likely
+ to look for such a notice. If You created one or more Modification(s)
+ You may add your name as a Contributor to the notice described in
+ Exhibit A. You must also duplicate this License in any documentation
+ for the Source Code where You describe recipients' rights or ownership
+ rights relating to Covered Code. You may choose to offer, and to
+ charge a fee for, warranty, support, indemnity or liability
+ obligations to one or more recipients of Covered Code. However, You
+ may do so only on Your own behalf, and not on behalf of the Initial
+ Developer or any Contributor. You must make it absolutely clear than
+ any such warranty, support, indemnity or liability obligation is
+ offered by You alone, and You hereby agree to indemnify the Initial
+ Developer and every Contributor for any liability incurred by the
+ Initial Developer or such Contributor as a result of warranty,
+ support, indemnity or liability terms You offer.
+
+ 3.6. Distribution of Executable Versions.
+ You may distribute Covered Code in Executable form only if the
+ requirements of Section 3.1-3.5 have been met for that Covered Code,
+ and if You include a notice stating that the Source Code version of
+ the Covered Code is available under the terms of this License,
+ including a description of how and where You have fulfilled the
+ obligations of Section 3.2. The notice must be conspicuously included
+ in any notice in an Executable version, related documentation or
+ collateral in which You describe recipients' rights relating to the
+ Covered Code. You may distribute the Executable version of Covered
+ Code or ownership rights under a license of Your choice, which may
+ contain terms different from this License, provided that You are in
+ compliance with the terms of this License and that the license for the
+ Executable version does not attempt to limit or alter the recipient's
+ rights in the Source Code version from the rights set forth in this
+ License. If You distribute the Executable version under a different
+ license You must make it absolutely clear that any terms which differ
+ from this License are offered by You alone, not by the Initial
+ Developer or any Contributor. You hereby agree to indemnify the
+ Initial Developer and every Contributor for any liability incurred by
+ the Initial Developer or such Contributor as a result of any such
+ terms You offer.
+
+ 3.7. Larger Works.
+ You may create a Larger Work by combining Covered Code with other code
+ not governed by the terms of this License and distribute the Larger
+ Work as a single product. In such a case, You must make sure the
+ requirements of this License are fulfilled for the Covered Code.
+
+4. Inability to Comply Due to Statute or Regulation.
+
+ If it is impossible for You to comply with any of the terms of this
+ License with respect to some or all of the Covered Code due to
+ statute, judicial order, or regulation then You must: (a) comply with
+ the terms of this License to the maximum extent possible; and (b)
+ describe the limitations and the code they affect. Such description
+ must be included in the LEGAL file described in Section 3.4 and must
+ be included with all distributions of the Source Code. Except to the
+ extent prohibited by statute or regulation, such description must be
+ sufficiently detailed for a recipient of ordinary skill to be able to
+ understand it.
+
+5. Application of this License.
+
+ This License applies to code to which the Initial Developer has
+ attached the notice in Exhibit A and to related Covered Code.
+
+6. Versions of the License.
+
+ 6.1. New Versions.
+ Netscape Communications Corporation ("Netscape") may publish revised
+ and/or new versions of the License from time to time. Each version
+ will be given a distinguishing version number.
+
+ 6.2. Effect of New Versions.
+ Once Covered Code has been published under a particular version of the
+ License, You may always continue to use it under the terms of that
+ version. You may also choose to use such Covered Code under the terms
+ of any subsequent version of the License published by Netscape. No one
+ other than Netscape has the right to modify the terms applicable to
+ Covered Code created under this License.
+
+ 6.3. Derivative Works.
+ If You create or use a modified version of this License (which you may
+ only do in order to apply it to code which is not already Covered Code
+ governed by this License), You must (a) rename Your license so that
+ the phrases "Mozilla", "MOZILLAPL", "MOZPL", "Netscape",
+ "MPL", "NPL" or any confusingly similar phrase do not appear in your
+ license (except to note that your license differs from this License)
+ and (b) otherwise make it clear that Your version of the license
+ contains terms which differ from the Mozilla Public License and
+ Netscape Public License. (Filling in the name of the Initial
+ Developer, Original Code or Contributor in the notice described in
+ Exhibit A shall not of themselves be deemed to be modifications of
+ this License.)
+
+7. DISCLAIMER OF WARRANTY.
+
+ COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS,
+ WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF
+ DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING.
+ THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED CODE
+ IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT,
+ YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE
+ COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER
+ OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF
+ ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER.
+
+8. TERMINATION.
+
+ 8.1. This License and the rights granted hereunder will terminate
+ automatically if You fail to comply with terms herein and fail to cure
+ such breach within 30 days of becoming aware of the breach. All
+ sublicenses to the Covered Code which are properly granted shall
+ survive any termination of this License. Provisions which, by their
+ nature, must remain in effect beyond the termination of this License
+ shall survive.
+
+ 8.2. If You initiate litigation by asserting a patent infringement
+ claim (excluding declatory judgment actions) against Initial Developer
+ or a Contributor (the Initial Developer or Contributor against whom
+ You file such action is referred to as "Participant") alleging that:
+
+ (a) such Participant's Contributor Version directly or indirectly
+ infringes any patent, then any and all rights granted by such
+ Participant to You under Sections 2.1 and/or 2.2 of this License
+ shall, upon 60 days notice from Participant terminate prospectively,
+ unless if within 60 days after receipt of notice You either: (i)
+ agree in writing to pay Participant a mutually agreeable reasonable
+ royalty for Your past and future use of Modifications made by such
+ Participant, or (ii) withdraw Your litigation claim with respect to
+ the Contributor Version against such Participant. If within 60 days
+ of notice, a reasonable royalty and payment arrangement are not
+ mutually agreed upon in writing by the parties or the litigation claim
+ is not withdrawn, the rights granted by Participant to You under
+ Sections 2.1 and/or 2.2 automatically terminate at the expiration of
+ the 60 day notice period specified above.
+
+ (b) any software, hardware, or device, other than such Participant's
+ Contributor Version, directly or indirectly infringes any patent, then
+ any rights granted to You by such Participant under Sections 2.1(b)
+ and 2.2(b) are revoked effective as of the date You first made, used,
+ sold, distributed, or had made, Modifications made by that
+ Participant.
+
+ 8.3. If You assert a patent infringement claim against Participant
+ alleging that such Participant's Contributor Version directly or
+ indirectly infringes any patent where such claim is resolved (such as
+ by license or settlement) prior to the initiation of patent
+ infringement litigation, then the reasonable value of the licenses
+ granted by such Participant under Sections 2.1 or 2.2 shall be taken
+ into account in determining the amount or value of any payment or
+ license.
+
+ 8.4. In the event of termination under Sections 8.1 or 8.2 above,
+ all end user license agreements (excluding distributors and resellers)
+ which have been validly granted by You or any distributor hereunder
+ prior to termination shall survive termination.
+
+9. LIMITATION OF LIABILITY.
+
+ UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
+ (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL
+ DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE,
+ OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR
+ ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY
+ CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL,
+ WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER
+ COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN
+ INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF
+ LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY
+ RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW
+ PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE
+ EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO
+ THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU.
+
+10. U.S. GOVERNMENT END USERS.
+
+ The Covered Code is a "commercial item," as that term is defined in
+ 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer
+ software" and "commercial computer software documentation," as such
+ terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48
+ C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995),
+ all U.S. Government End Users acquire Covered Code with only those
+ rights set forth herein.
+
+11. MISCELLANEOUS.
+
+ This License represents the complete agreement concerning subject
+ matter hereof. If any provision of this License is held to be
+ unenforceable, such provision shall be reformed only to the extent
+ necessary to make it enforceable. This License shall be governed by
+ California law provisions (except to the extent applicable law, if
+ any, provides otherwise), excluding its conflict-of-law provisions.
+ With respect to disputes in which at least one party is a citizen of,
+ or an entity chartered or registered to do business in the United
+ States of America, any litigation relating to this License shall be
+ subject to the jurisdiction of the Federal Courts of the Northern
+ District of California, with venue lying in Santa Clara County,
+ California, with the losing party responsible for costs, including
+ without limitation, court costs and reasonable attorneys' fees and
+ expenses. The application of the United Nations Convention on
+ Contracts for the International Sale of Goods is expressly excluded.
+ Any law or regulation which provides that the language of a contract
+ shall be construed against the drafter shall not apply to this
+ License.
+
+12. RESPONSIBILITY FOR CLAIMS.
+
+ As between Initial Developer and the Contributors, each party is
+ responsible for claims and damages arising, directly or indirectly,
+ out of its utilization of rights under this License and You agree to
+ work with Initial Developer and Contributors to distribute such
+ responsibility on an equitable basis. Nothing herein is intended or
+ shall be deemed to constitute any admission of liability.
+
+13. MULTIPLE-LICENSED CODE.
+
+ Initial Developer may designate portions of the Covered Code as
+ "Multiple-Licensed". "Multiple-Licensed" means that the Initial
+ Developer permits you to utilize portions of the Covered Code under
+ Your choice of the MPL or the alternative licenses, if any, specified
+ by the Initial Developer in the file described in Exhibit A.
+
+EXHIBIT A -Mozilla Public License.
+
+ ``The contents of this file are subject to the Mozilla Public License
+ Version 1.1 (the "License"); you may not use this file except in
+ compliance with the License. You may obtain a copy of the License at
+ http://www.mozilla.org/MPL/
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ License for the specific language governing rights and limitations
+ under the License.
+
+ The Original Code is ______________________________________.
+
+ The Initial Developer of the Original Code is ________________________.
+ Portions created by ______________________ are Copyright (C) ______
+ _______________________. All Rights Reserved.
+
+ Contributor(s): ______________________________________.
+
+ Alternatively, the contents of this file may be used under the terms
+ of the _____ license (the "[___] License"), in which case the
+ provisions of [______] License are applicable instead of those
+ above. If you wish to allow use of your version of this file only
+ under the terms of the [____] License and not to allow others to use
+ your version of this file under the MPL, indicate your decision by
+ deleting the provisions above and replace them with the notice and
+ other provisions required by the [___] License. If you do not delete
+ the provisions above, a recipient may use your version of this file
+ under either the MPL or the [___] License."
+
+ [NOTE: The text of this Exhibit A may differ slightly from the text of
+ the notices in the Source Code files of the Original Code. You should
+ use the text of this Exhibit A rather than the text found in the
+ Original Code Source Code for Your Modifications.]
+
+
+
+
+
+
+
+
+
+ org.jruby
+ jruby-complete
+ JRuby Complete
+ http://www.jruby.org/
+
+
+
+ Common Public License Version 1.0
+ http://www-128.ibm.com/developerworks/library/os-cpl.html
+ repo
+
+Copyright (c) 2007-2011 The JRuby project
+
+
+
+
+
+
+
+
+ org.eclipse.jdt
+ core
+ Eclipse JDT Core
+ http://www.eclipse.org/jdt/
+
+
+
+ Eclipse Public License v1.0
+ http://www.eclipse.org/org/documents/epl-v10.php
+ repo
+
+Eclipse Public License - v 1.0
+
+THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
+LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
+CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
+
+1. DEFINITIONS
+
+"Contribution" means:
+
+ a) in the case of the initial Contributor, the initial code and
+documentation distributed under this Agreement, and
+
+ b) in the case of each subsequent Contributor:
+
+ i) changes to the Program, and
+
+ ii) additions to the Program;
+
+ where such changes and/or additions to the Program originate from and are
+distributed by that particular Contributor. A Contribution 'originates' from a
+Contributor if it was added to the Program by such Contributor itself or anyone
+acting on such Contributor's behalf. Contributions do not include additions to
+the Program which: (i) are separate modules of software distributed in
+conjunction with the Program under their own license agreement, and (ii) are
+not derivative works of the Program.
+
+"Contributor" means any person or entity that distributes the Program.
+
+"Licensed Patents" mean patent claims licensable by a Contributor which are
+necessarily infringed by the use or sale of its Contribution alone or when
+combined with the Program.
+
+"Program" means the Contributions distributed in accordance with this
+Agreement.
+
+"Recipient" means anyone who receives the Program under this Agreement,
+including all Contributors.
+
+2. GRANT OF RIGHTS
+
+a) Subject to the terms of this Agreement, each Contributor hereby grants
+Recipient a non-exclusive, worldwide, royalty-free copyright license to
+reproduce, prepare derivative works of, publicly display, publicly perform,
+distribute and sublicense the Contribution of such Contributor, if any, and
+such derivative works, in source code and object code form.
+
+b) Subject to the terms of this Agreement, each Contributor hereby grants
+Recipient a non-exclusive, worldwide, royalty-free patent license under
+Licensed Patents to make, use, sell, offer to sell, import and otherwise
+transfer the Contribution of such Contributor, if any, in source code and
+object code form. This patent license shall apply to the combination of the
+Contribution and the Program if, at the time the Contribution is added by the
+Contributor, such addition of the Contribution causes such combination to be
+covered by the Licensed Patents. The patent license shall not apply to any
+other combinations which include the Contribution. No hardware per se is
+licensed hereunder.
+
+c) Recipient understands that although each Contributor grants the licenses to
+its Contributions set forth herein, no assurances are provided by any
+Contributor that the Program does not infringe the patent or other intellectual
+property rights of any other entity. Each Contributor disclaims any liability
+to Recipient for claims brought by any other entity based on infringement of
+intellectual property rights or otherwise. As a condition to exercising the
+rights and licenses granted hereunder, each Recipient hereby assumes sole
+responsibility to secure any other intellectual property rights needed, if any.
+For example, if a third party patent license is required to allow Recipient to
+distribute the Program, it is Recipient's responsibility to acquire that
+license before distributing the Program.
+
+d) Each Contributor represents that to its knowledge it has sufficient
+copyright rights in its Contribution, if any, to grant the copyright license
+set forth in this Agreement.
+
+3. REQUIREMENTS
+
+A Contributor may choose to distribute the Program in object code form under
+its own license agreement, provided that:
+
+ a) it complies with the terms and conditions of this Agreement; and
+
+ b) its license agreement:
+
+ i) effectively disclaims on behalf of all Contributors all warranties and
+conditions, express and implied, including warranties or conditions of title
+and non-infringement, and implied warranties or conditions of merchantability
+and fitness for a particular purpose;
+
+ ii) effectively excludes on behalf of all Contributors all liability for
+damages, including direct, indirect, special, incidental and consequential
+damages, such as lost profits;
+
+ iii) states that any provisions which differ from this Agreement are
+offered by that Contributor alone and not by any other party; and
+
+ iv) states that source code for the Program is available from such
+Contributor, and informs licensees how to obtain it in a reasonable manner on
+or through a medium customarily used for software exchange.
+
+When the Program is made available in source code form:
+
+ a) it must be made available under this Agreement; and
+
+ b) a copy of this Agreement must be included with each copy of the Program.
+
+Contributors may not remove or alter any copyright notices contained within the
+Program.
+
+Each Contributor must identify itself as the originator of its Contribution, if
+any, in a manner that reasonably allows subsequent Recipients to identify the
+originator of the Contribution.
+
+4. COMMERCIAL DISTRIBUTION
+
+Commercial distributors of software may accept certain responsibilities with
+respect to end users, business partners and the like. While this license is
+intended to facilitate the commercial use of the Program, the Contributor who
+includes the Program in a commercial product offering should do so in a manner
+which does not create potential liability for other Contributors. Therefore, if
+a Contributor includes the Program in a commercial product offering, such
+Contributor ("Commercial Contributor") hereby agrees to defend and indemnify
+every other Contributor ("Indemnified Contributor") against any losses, damages
+and costs (collectively "Losses") arising from claims, lawsuits and other legal
+actions brought by a third party against the Indemnified Contributor to the
+extent caused by the acts or omissions of such Commercial Contributor in
+connection with its distribution of the Program in a commercial product
+offering. The obligations in this section do not apply to any claims or Losses
+relating to any actual or alleged intellectual property infringement. In order
+to qualify, an Indemnified Contributor must: a) promptly notify the Commercial
+Contributor in writing of such claim, and b) allow the Commercial Contributor
+to control, and cooperate with the Commercial Contributor in, the defense and
+any related settlement negotiations. The Indemnified Contributor may
+participate in any such claim at its own expense.
+
+For example, a Contributor might include the Program in a commercial product
+offering, Product X. That Contributor is then a Commercial Contributor. If that
+Commercial Contributor then makes performance claims, or offers warranties
+related to Product X, those performance claims and warranties are such
+Commercial Contributor's responsibility alone. Under this section, the
+Commercial Contributor would have to defend claims against the other
+Contributors related to those performance claims and warranties, and if a court
+requires any other Contributor to pay any damages as a result, the Commercial
+Contributor must pay those damages.
+
+5. NO WARRANTY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR
+IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE,
+NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each
+Recipient is solely responsible for determining the appropriateness of using
+and distributing the Program and assumes all risks associated with its exercise
+of rights under this Agreement , including but not limited to the risks and
+costs of program errors, compliance with applicable laws, damage to or loss of
+data, programs or equipment, and unavailability or interruption of operations.
+
+6. DISCLAIMER OF LIABILITY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
+CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST
+PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS
+GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+7. GENERAL
+
+If any provision of this Agreement is invalid or unenforceable under applicable
+law, it shall not affect the validity or enforceability of the remainder of the
+terms of this Agreement, and without further action by the parties hereto, such
+provision shall be reformed to the minimum extent necessary to make such
+provision valid and enforceable.
+
+If Recipient institutes patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Program itself
+(excluding combinations of the Program with other software or hardware)
+infringes such Recipient's patent(s), then such Recipient's rights granted
+under Section 2(b) shall terminate as of the date such litigation is filed.
+
+All Recipient's rights under this Agreement shall terminate if it fails to
+comply with any of the material terms or conditions of this Agreement and does
+not cure such failure in a reasonable period of time after becoming aware of
+such noncompliance. If all Recipient's rights under this Agreement terminate,
+Recipient agrees to cease use and distribution of the Program as soon as
+reasonably practicable. However, Recipient's obligations under this Agreement
+and any licenses granted by Recipient relating to the Program shall continue
+and survive.
+
+Everyone is permitted to copy and distribute copies of this Agreement, but in
+order to avoid inconsistency the Agreement is copyrighted and may only be
+modified in the following manner. The Agreement Steward reserves the right to
+publish new versions (including revisions) of this Agreement from time to time.
+No one other than the Agreement Steward has the right to modify this Agreement.
+The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation
+may assign the responsibility to serve as the Agreement Steward to a suitable
+separate entity. Each new version of the Agreement will be given a
+distinguishing version number. The Program (including Contributions) may always
+be distributed subject to the version of the Agreement under which it was
+received. In addition, after a new version of the Agreement is published,
+Contributor may elect to distribute the Program (including its Contributions)
+under the new version. Except as expressly stated in Sections 2(a) and 2(b)
+above, Recipient receives no rights or licenses to the intellectual property of
+any Contributor under this Agreement, whether expressly, by implication,
+estoppel or otherwise. All rights in the Program not expressly granted under
+this Agreement are reserved.
+
+This Agreement is governed by the laws of the State of New York and the
+intellectual property laws of the United States of America. No party to this
+Agreement will bring a legal action under this Agreement more than one year
+after the cause of action arose. Each party waives its rights to a jury trial
+in any resulting litigation.
+
+
+
+
+
+
diff --git a/src/docbkx/book.xml b/src/docbkx/book.xml
index dbc43bf8b04a..05133bad2df8 100644
--- a/src/docbkx/book.xml
+++ b/src/docbkx/book.xml
@@ -1,7 +1,6 @@
HBase and Schema DesignA good general introduction on the strength and weaknesses modelling on
- the various non-rdbms datastores is Ian Varleys' Master thesis,
+ the various non-rdbms datastores is Ian Varley's Master thesis,
No Relation: The Mixed Blessings of Non-Relational Databases.
Recommended. Also, read for how HBase stores data internally.
@@ -575,31 +590,31 @@ htable.put(put);
Tables must be disabled when making ColumnFamily modifications, for example..
-Configuration config = HBaseConfiguration.create();
-HBaseAdmin admin = new HBaseAdmin(conf);
+Configuration config = HBaseConfiguration.create();
+HBaseAdmin admin = new HBaseAdmin(conf);
String table = "myTable";
-admin.disableTable(table);
+admin.disableTable(table);
HColumnDescriptor cf1 = ...;
admin.addColumn(table, cf1); // adding new ColumnFamily
HColumnDescriptor cf2 = ...;
admin.modifyColumn(table, cf2); // modifying existing ColumnFamily
-admin.enableTable(table);
+admin.enableTable(table);
See for more information about configuring client connections.
Note: online schema changes are supported in the 0.92.x codebase, but the 0.90.x codebase requires the table
to be disabled.
- Schema Updates
+ Schema UpdatesWhen changes are made to either Tables or ColumnFamilies (e.g., region size, block size), these changes
take effect the next time there is a major compaction and the StoreFiles get re-written.
See for more information on StoreFiles.
-
+
On the number of column families
@@ -610,7 +625,7 @@ admin.enableTable(table);
if one column family is carrying the bulk of the data bringing on flushes, the adjacent families
will also be flushed though the amount of data they carry is small. When many column families the
flushing and compaction interaction can make for a bunch of needless i/o loading (To be addressed by
- changing flushing and compaction to work on a per column family basis). For more information
+ changing flushing and compaction to work on a per column family basis). For more information
on compactions, see .
Try to make do with one column family if you can in your schemas. Only introduce a
@@ -618,9 +633,9 @@ admin.enableTable(table);
i.e. you query one column family or the other but usually not both at the one time.
Cardinality of ColumnFamilies
- Where multiple ColumnFamilies exist in a single table, be aware of the cardinality (i.e., number of rows).
- If ColumnFamilyA has 1 million rows and ColumnFamilyB has 1 billion rows, ColumnFamilyA's data will likely be spread
- across many, many regions (and RegionServers). This makes mass scans for ColumnFamilyA less efficient.
+ Where multiple ColumnFamilies exist in a single table, be aware of the cardinality (i.e., number of rows).
+ If ColumnFamilyA has 1 million rows and ColumnFamilyB has 1 billion rows, ColumnFamilyA's data will likely be spread
+ across many, many regions (and RegionServers). This makes mass scans for ColumnFamilyA less efficient.
@@ -632,7 +647,7 @@ admin.enableTable(table);
In the HBase chapter of Tom White's book Hadoop: The Definitive Guide (O'Reilly) there is a an optimization note on watching out for a phenomenon where an import process walks in lock-step with all clients in concert pounding one of the table's regions (and thus, a single node), then moving onto the next region, etc. With monotonically increasing row-keys (i.e., using a timestamp), this will happen. See this comic by IKai Lan on why monotonically increasing row keys are problematic in BigTable-like datastores:
monotonically increasing values are bad. The pile-up on a single region brought on
- by monotonically increasing keys can be mitigated by randomizing the input records to not be in sorted order, but in general its best to avoid using a timestamp or a sequence (e.g. 1, 2, 3) as the row-key.
+ by monotonically increasing keys can be mitigated by randomizing the input records to not be in sorted order, but in general it's best to avoid using a timestamp or a sequence (e.g. 1, 2, 3) as the row-key.
@@ -670,20 +685,20 @@ admin.enableTable(table);
See for more information on HBase stores data internally to see why this is important.Column FamiliesTry to keep the ColumnFamily names as small as possible, preferably one character (e.g. "d" for data/default).
-
+
See for more information on HBase stores data internally to see why this is important.AttributesAlthough verbose attribute names (e.g., "myVeryImportantAttribute") are easier to read, prefer shorter attribute names (e.g., "via")
to store in HBase.
-
+
See for more information on HBase stores data internally to see why this is important.Rowkey Length
- Keep them as short as is reasonable such that they can still be useful for required data access (e.g., Get vs. Scan).
+ Keep them as short as is reasonable such that they can still be useful for required data access (e.g., Get vs. Scan).
A short key that is useless for data access is not better than a longer key with better get/scan properties. Expect tradeoffs
when designing rowkeys.
-
+ Byte PatternsA long is 8 bytes. You can store an unsigned number up to 18,446,744,073,709,551,615 in those eight bytes.
@@ -696,28 +711,28 @@ admin.enableTable(table);
long l = 1234567890L;
byte[] lb = Bytes.toBytes(l);
System.out.println("long bytes length: " + lb.length); // returns 8
-
+
String s = "" + l;
byte[] sb = Bytes.toBytes(s);
System.out.println("long as string length: " + sb.length); // returns 10
-
-// hash
+
+// hash
//
MessageDigest md = MessageDigest.getInstance("MD5");
byte[] digest = md.digest(Bytes.toBytes(s));
System.out.println("md5 digest bytes length: " + digest.length); // returns 16
-
+
String sDigest = new String(digest);
byte[] sbDigest = Bytes.toBytes(sDigest);
-System.out.println("md5 digest as string length: " + sbDigest.length); // returns 26
-
+System.out.println("md5 digest as string length: " + sbDigest.length); // returns 26
+
-
+
Reverse TimestampsA common problem in database processing is quickly finding the most recent version of a value. A technique using reverse timestamps
- as a part of the key can help greatly with a special case of this problem. Also found in the HBase chapter of Tom White's book Hadoop: The Definitive Guide (O'Reilly),
+ as a part of the key can help greatly with a special case of this problem. Also found in the HBase chapter of Tom White's book Hadoop: The Definitive Guide (O'Reilly),
the technique involves appending (Long.MAX_VALUE - timestamp) to the end of any key, e.g., [key][reverse_timestamp].
The most recent value for [key] in a table can be found by performing a Scan for [key] and obtaining the first record. Since HBase keys
@@ -734,11 +749,76 @@ System.out.println("md5 digest as string length: " + sbDigest.length); // ret
Immutability of RowkeysRowkeys cannot be changed. The only way they can be "changed" in a table is if the row is deleted and then re-inserted.
- This is a fairly common question on the HBase dist-list so it pays to get the rowkeys right the first time (and/or before you've
+ This is a fairly common question on the HBase dist-list so it pays to get the rowkeys right the first time (and/or before you've
inserted a lot of data).
-
+ Relationship Between RowKeys and Region Splits
+ If you pre-split your table, it is critical to understand how your rowkey will be distributed across
+ the region boundaries. As an example of why this is important, consider the example of using displayable hex characters as the
+ lead position of the key (e.g., ""0000000000000000" to "ffffffffffffffff"). Running those key ranges through Bytes.split
+ (which is the split strategy used when creating regions in HBaseAdmin.createTable(byte[] startKey, byte[] endKey, numRegions)
+ for 10 regions will generate the following splits...
+
+
+
+48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 // 0
+54 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 // 6
+61 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -68 // =
+68 -124 -124 -124 -124 -124 -124 -124 -124 -124 -124 -124 -124 -124 -124 -126 // D
+75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 72 // K
+82 18 18 18 18 18 18 18 18 18 18 18 18 18 18 14 // R
+88 -40 -40 -40 -40 -40 -40 -40 -40 -40 -40 -40 -40 -40 -40 -44 // X
+95 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -102 // _
+102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 // f
+
+ ... (note: the lead byte is listed to the right as a comment.) Given that the first split is a '0' and the last split is an 'f',
+ everything is great, right? Not so fast.
+
+ The problem is that all the data is going to pile up in the first 2 regions and the last region thus creating a "lumpy" (and
+ possibly "hot") region problem. To understand why, refer to an ASCII Table.
+ '0' is byte 48, and 'f' is byte 102, but there is a huge gap in byte values (bytes 58 to 96) that will never appear in this
+ keyspace because the only values are [0-9] and [a-f]. Thus, the middle regions regions will
+ never be used. To make pre-spliting work with this example keyspace, a custom definition of splits (i.e., and not relying on the
+ built-in split method) is required.
+
+ Lesson #1: Pre-splitting tables is generally a best practice, but you need to pre-split them in such a way that all the
+ regions are accessible in the keyspace. While this example demonstrated the problem with a hex-key keyspace, the same problem can happen
+ with any keyspace. Know your data.
+
+ Lesson #2: While generally not advisable, using hex-keys (and more generally, displayable data) can still work with pre-split
+ tables as long as all the created regions are accessible in the keyspace.
+
+ To conclude this example, the following is an example of how appropriate splits can be pre-created for hex-keys:.
+
+public static boolean createTable(HBaseAdmin admin, HTableDescriptor table, byte[][] splits)
+throws IOException {
+ try {
+ admin.createTable( table, splits );
+ return true;
+ } catch (TableExistsException e) {
+ logger.info("table " + table.getNameAsString() + " already exists");
+ // the table already exists...
+ return false;
+ }
+}
+
+public static byte[][] getHexSplits(String startKey, String endKey, int numRegions) {
+ byte[][] splits = new byte[numRegions-1][];
+ BigInteger lowestKey = new BigInteger(startKey, 16);
+ BigInteger highestKey = new BigInteger(endKey, 16);
+ BigInteger range = highestKey.subtract(lowestKey);
+ BigInteger regionIncrement = range.divide(BigInteger.valueOf(numRegions));
+ lowestKey = lowestKey.add(regionIncrement);
+ for(int i=0; i < numRegions-1;i++) {
+ BigInteger key = lowestKey.add(regionIncrement.multiply(BigInteger.valueOf(i)));
+ byte[] b = String.format("%016x", key).getBytes();
+ splits[i] = b;
+ }
+ return splits;
+}
+
+
Number of Versions
@@ -752,8 +832,8 @@ System.out.println("md5 digest as string length: " + sbDigest.length); // ret
stores different values per row by time (and qualifier). Excess versions are removed during major
compactions. The number of max versions may need to be increased or decreased depending on application needs.
- It is not recommended setting the number of max versions to an exceedingly high level (e.g., hundreds or more) unless those old values are
- very dear to you because this will greatly increase StoreFile size.
+ It is not recommended setting the number of max versions to an exceedingly high level (e.g., hundreds or more) unless those old values are
+ very dear to you because this will greatly increase StoreFile size.
@@ -778,24 +858,24 @@ System.out.println("md5 digest as string length: " + sbDigest.length); // ret
HBase supports a "bytes-in/bytes-out" interface via Put and
Result, so anything that can be
- converted to an array of bytes can be stored as a value. Input could be strings, numbers, complex objects, or even images as long as they can rendered as bytes.
+ converted to an array of bytes can be stored as a value. Input could be strings, numbers, complex objects, or even images as long as they can rendered as bytes.
There are practical limits to the size of values (e.g., storing 10-50MB objects in HBase would probably be too much to ask);
- search the mailling list for conversations on this topic. All rows in HBase conform to the datamodel, and
- that includes versioning. Take that into consideration when making your design, as well as block size for the ColumnFamily.
+ search the mailling list for conversations on this topic. All rows in HBase conform to the datamodel, and
+ that includes versioning. Take that into consideration when making your design, as well as block size for the ColumnFamily.
Counters
- One supported datatype that deserves special mention are "counters" (i.e., the ability to do atomic increments of numbers). See
+ One supported datatype that deserves special mention are "counters" (i.e., the ability to do atomic increments of numbers). See
Increment in HTable.
Synchronization on counters are done on the RegionServer, not in the client.
-
+ Joins
- If you have multiple tables, don't forget to factor in the potential for into the schema design.
+ If you have multiple tables, don't forget to factor in the potential for into the schema design.
@@ -828,22 +908,22 @@ System.out.println("md5 digest as string length: " + sbDigest.length); // ret
Secondary Indexes and Alternate Query Paths
This section could also be titled "what if my table rowkey looks like this but I also want to query my table like that."
- A common example on the dist-list is where a row-key is of the format "user-timestamp" but there are are reporting requirements on activity across users for certain
+ A common example on the dist-list is where a row-key is of the format "user-timestamp" but there are reporting requirements on activity across users for certain
time ranges. Thus, selecting by user is easy because it is in the lead position of the key, but time is not.
There is no single answer on the best way to handle this because it depends on...
- Number of users
+ Number of usersData size and data arrival rate
- Flexibility of reporting requirements (e.g., completely ad-hoc date selection vs. pre-configured ranges)
- Desired execution speed of query (e.g., 90 seconds may be reasonable to some for an ad-hoc report, whereas it may be too long for others)
+ Flexibility of reporting requirements (e.g., completely ad-hoc date selection vs. pre-configured ranges)
+ Desired execution speed of query (e.g., 90 seconds may be reasonable to some for an ad-hoc report, whereas it may be too long for others)
- ... and solutions are also influenced by the size of the cluster and how much processing power you have to throw at the solution.
- Common techniques are in sub-sections below. This is a comprehensive, but not exhaustive, list of approaches.
+ ... and solutions are also influenced by the size of the cluster and how much processing power you have to throw at the solution.
+ Common techniques are in sub-sections below. This is a comprehensive, but not exhaustive, list of approaches.
- It should not be a surprise that secondary indexes require additional cluster space and processing.
+ It should not be a surprise that secondary indexes require additional cluster space and processing.
This is precisely what happens in an RDBMS because the act of creating an alternate index requires both space and processing cycles to update. RBDMS products
- are more advanced in this regard to handle alternative index management out of the box. However, HBase scales better at larger data volumes, so this is a feature trade-off.
+ are more advanced in this regard to handle alternative index management out of the box. However, HBase scales better at larger data volumes, so this is a feature trade-off.
Pay attention to when implementing any of these approaches.Additionally, see the David Butler response in this dist-list thread HBase, mail # user - Stargate+hbase
@@ -860,7 +940,7 @@ System.out.println("md5 digest as string length: " + sbDigest.length); // ret
Periodic-Update Secondary Index
- A secondary index could be created in an other table which is periodically updated via a MapReduce job. The job could be executed intra-day, but depending on
+ A secondary index could be created in an other table which is periodically updated via a MapReduce job. The job could be executed intra-day, but depending on
load-strategy it could still potentially be out of sync with the main data table.See for more information.
@@ -868,7 +948,7 @@ System.out.println("md5 digest as string length: " + sbDigest.length); // ret
Dual-Write Secondary Index
- Another strategy is to build the secondary index while publishing data to the cluster (e.g., write to data table, write to index table).
+ Another strategy is to build the secondary index while publishing data to the cluster (e.g., write to data table, write to index table).
If this is approach is taken after a data table already exists, then bootstrapping will be needed for the secondary index with a MapReduce job (see ).
@@ -888,12 +968,12 @@ System.out.println("md5 digest as string length: " + sbDigest.length); // ret
Schema Design Smackdown
- This section will describe common schema design questions that appear on the dist-list. These are
- general guidelines and not laws - each application must consider it's own needs.
+ This section will describe common schema design questions that appear on the dist-list. These are
+ general guidelines and not laws - each application must consider its own needs.
Rows vs. VersionsA common question is whether one should prefer rows or HBase's built-in-versioning. The context is typically where there are
- "a lot" of versions of a row to be retained (e.g., where it is significantly above the HBase default of 3 max versions). The
+ "a lot" of versions of a row to be retained (e.g., where it is significantly above the HBase default of 3 max versions). The
rows-approach would require storing a timstamp in some portion of the rowkey so that they would not overwite with each successive update.
Preference: Rows (generally speaking).
@@ -901,18 +981,29 @@ System.out.println("md5 digest as string length: " + sbDigest.length); // ret
Rows vs. ColumnsAnother common question is whether one should prefer rows or columns. The context is typically in extreme cases of wide
- tables, such as having 1 row with 1 million attributes, or 1 million rows with 1 columns apiece.
+ tables, such as having 1 row with 1 million attributes, or 1 million rows with 1 columns apiece.
- Preference: Rows (generally speaking). To be clear, this guideline is in the context is in extremely wide cases, not in the
- standard use-case where one needs to store a few dozen or hundred columns.
+ Preference: Rows (generally speaking). To be clear, this guideline is in the context is in extremely wide cases, not in the
+ standard use-case where one needs to store a few dozen or hundred columns. But there is also a middle path between these two
+ options, and that is "Rows as Columns."
+ Rows as Columns
+ The middle path between Rows vs. Columns is packing data that would be a separate row into columns, for certain rows.
+ OpenTSDB is the best example of this case where a single row represents a defined time-range, and then discrete events are treated as
+ columns. This approach is often more complex, and may require the additional complexity of re-writing your data, but has the
+ advantage of being I/O efficient. For an overview of this approach, see
+ Lessons Learned from OpenTSDB
+ from HBaseCon2012.
+
+
+
Operational and Performance Configuration OptionsSee the Performance section for more information operational and performance
schema design options, such as Bloom Filters, Table-configured regionsizes, compression, and blocksizes.
-
+
ConstraintsHBase currently supports 'constraints' in traditional (SQL) database parlance. The advised usage for Constraints is in enforcing business rules for attributes in the table (eg. make sure values are in the range 1-10).
@@ -942,9 +1033,9 @@ System.out.println("md5 digest as string length: " + sbDigest.length); // ret
Custom Splitters
- For those interested in implementing custom splitters, see the method getSplits in
+ For those interested in implementing custom splitters, see the method getSplits in
TableInputFormatBase.
- That is where the logic for map-task assignment resides.
+ That is where the logic for map-task assignment resides.
@@ -959,22 +1050,22 @@ System.out.println("md5 digest as string length: " + sbDigest.length); // ret
Configuration config = HBaseConfiguration.create();
Job job = new Job(config, "ExampleRead");
job.setJarByClass(MyReadJob.class); // class that contains mapper
-
+
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
// set other scan attrs
...
-
+
TableMapReduceUtil.initTableMapperJob(
tableName, // input HBase table name
scan, // Scan instance to control CF and attribute selection
MyMapper.class, // mapper
- null, // mapper output key
+ null, // mapper output key
null, // mapper output value
job);
job.setOutputFormatClass(NullOutputFormat.class); // because we aren't emitting anything from mapper
-
+
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
@@ -987,24 +1078,24 @@ public static class MyMapper extends TableMapper<Text, Text> {
public void map(ImmutableBytesWritable row, Result value, Context context) throws InterruptedException, IOException {
// process data for the row from the Result instance.
}
-}
+}
HBase MapReduce Read/Write Example
- The following is an example of using HBase both as a source and as a sink with MapReduce.
+ The following is an example of using HBase both as a source and as a sink with MapReduce.
This example will simply copy data from one table to another.
Configuration config = HBaseConfiguration.create();
Job job = new Job(config,"ExampleReadWrite");
job.setJarByClass(MyReadWriteJob.class); // class that contains mapper
-
+
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
// set other scan attrs
-
+
TableMapReduceUtil.initTableMapperJob(
sourceTable, // input table
scan, // Scan instance to control CF and attribute selection
@@ -1017,17 +1108,17 @@ TableMapReduceUtil.initTableReducerJob(
null, // reducer class
job);
job.setNumReduceTasks(0);
-
+
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
}
- An explanation is required of what TableMapReduceUtil is doing, especially with the reducer.
+ An explanation is required of what TableMapReduceUtil is doing, especially with the reducer.
TableOutputFormat is being used
as the outputFormat class, and several parameters are being set on the config (e.g., TableOutputFormat.OUTPUT_TABLE), as
well as setting the reducer output key to ImmutableBytesWritable and reducer value to Writable.
- These could be set by the programmer on the job and conf, but TableMapReduceUtil tries to make things easier.
+ These could be set by the programmer on the job and conf, but TableMapReduceUtil tries to make things easier.
The following is the example mapper, which will create a Put and matching the input Result
and emit it. Note: this is what the CopyTable utility does.
@@ -1038,7 +1129,7 @@ public static class MyMapper extends TableMapper<ImmutableBytesWritable, Put&
// this example is just copying the data from the source table...
context.write(row, resultToPut(row,value));
}
-
+
private static Put resultToPut(ImmutableBytesWritable key, Result result) throws IOException {
Put put = new Put(key.get());
for (KeyValue kv : result.raw()) {
@@ -1049,9 +1140,9 @@ public static class MyMapper extends TableMapper<ImmutableBytesWritable, Put&
}
There isn't actually a reducer step, so TableOutputFormat takes care of sending the Put
- to the target table.
+ to the target table.
- This is just an example, developers could choose not to use TableOutputFormat and connect to the
+ This is just an example, developers could choose not to use TableOutputFormat and connect to the
target table themselves.
@@ -1063,18 +1154,18 @@ public static class MyMapper extends TableMapper<ImmutableBytesWritable, Put&
HBase MapReduce Summary to HBase Example
- The following example uses HBase as a MapReduce source and sink with a summarization step. This example will
+ The following example uses HBase as a MapReduce source and sink with a summarization step. This example will
count the number of distinct instances of a value in a table and write those summarized counts in another table.
Configuration config = HBaseConfiguration.create();
Job job = new Job(config,"ExampleSummary");
job.setJarByClass(MySummaryJob.class); // class that contains mapper and reducer
-
+
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
// set other scan attrs
-
+
TableMapReduceUtil.initTableMapperJob(
sourceTable, // input table
scan, // Scan instance to control CF and attribute selection
@@ -1087,20 +1178,20 @@ TableMapReduceUtil.initTableReducerJob(
MyTableReducer.class, // reducer class
job);
job.setNumReduceTasks(1); // at least one, adjust as required
-
+
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
-}
+}
- In this example mapper a column with a String-value is chosen as the value to summarize upon.
+ In this example mapper a column with a String-value is chosen as the value to summarize upon.
This value is used as the key to emit from the mapper, and an IntWritable represents an instance counter.
public static class MyMapper extends TableMapper<Text, IntWritable> {
private final IntWritable ONE = new IntWritable(1);
private Text text = new Text();
-
+
public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException {
String val = new String(value.getValue(Bytes.toBytes("cf"), Bytes.toBytes("attr1")));
text.set(val); // we can only emit Writables...
@@ -1112,7 +1203,7 @@ public static class MyMapper extends TableMapper<Text, IntWritable> {
In the reducer, the "ones" are counted (just like any other MR example that does this), and then emits a Put.
public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {
-
+
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int i = 0;
for (IntWritable val : values) {
@@ -1131,17 +1222,17 @@ public static class MyTableReducer extends TableReducer<Text, IntWritable, Im
HBase MapReduce Summary to File ExampleThis very similar to the summary example above, with exception that this is using HBase as a MapReduce source
but HDFS as the sink. The differences are in the job setup and in the reducer. The mapper remains the same.
-
+
Configuration config = HBaseConfiguration.create();
Job job = new Job(config,"ExampleSummaryToFile");
job.setJarByClass(MySummaryFileJob.class); // class that contains mapper and reducer
-
+
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
// set other scan attrs
-
+
TableMapReduceUtil.initTableMapperJob(
sourceTable, // input table
scan, // Scan instance to control CF and attribute selection
@@ -1152,22 +1243,22 @@ TableMapReduceUtil.initTableMapperJob(
job.setReducerClass(MyReducer.class); // reducer class
job.setNumReduceTasks(1); // at least one, adjust as required
FileOutputFormat.setOutputPath(job, new Path("/tmp/mr/mySummaryFile")); // adjust directories as required
-
+
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
-}
+}
- As stated above, the previous Mapper can run unchanged with this example.
+ As stated above, the previous Mapper can run unchanged with this example.
As for the Reducer, it is a "generic" Reducer instead of extending TableMapper and emitting Puts.
public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
-
+
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int i = 0;
for (IntWritable val : values) {
i += val.get();
- }
+ }
context.write(key, new IntWritable(i));
}
}
@@ -1176,11 +1267,11 @@ if (!b) {
HBase MapReduce Summary to HBase Without ReducerIt is also possible to perform summaries without a reducer - if you use HBase as the reducer.
-
+ An HBase target table would need to exist for the job summary. The HTable method incrementColumnValue
- would be used to atomically increment values. From a performance perspective, it might make sense to keep a Map
+ would be used to atomically increment values. From a performance perspective, it might make sense to keep a Map
of values with their values to be incremeneted for each map-task, and make one update per key at during the
- cleanup method of the mapper. However, your milage may vary depending on the number of rows to be processed and
+ cleanup method of the mapper. However, your milage may vary depending on the number of rows to be processed and
unique keys.
In the end, the summary results are in HBase.
@@ -1192,41 +1283,41 @@ if (!b) {
to generate summaries directly to an RDBMS via a custom reducer. The setup method
can connect to an RDBMS (the connection information can be passed via custom parameters in the context) and the
cleanup method can close the connection.
-
+
It is critical to understand that number of reducers for the job affects the summarization implementation, and
you'll have to design this into your reducer. Specifically, whether it is designed to run as a singleton (one reducer)
or multiple reducers. Neither is right or wrong, it depends on your use-case. Recognize that the more reducers that
- are assigned to the job, the more simultaneous connections to the RDBMS will be created - this will scale, but only to a point.
+ are assigned to the job, the more simultaneous connections to the RDBMS will be created - this will scale, but only to a point.
public static class MyRdbmsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private Connection c = null;
-
+
public void setup(Context context) {
// create DB connection...
}
-
+
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
// do summarization
// in this example the keys are Text, but this is just an example
}
-
+
public void cleanup(Context context) {
// close db connection
}
-
+
}
In the end, the summary results are written to your RDBMS table/s.
-
+
Accessing Other HBase Tables in a MapReduce JobAlthough the framework currently allows one HBase table as input to a
- MapReduce job, other HBase tables can
+ MapReduce job, other HBase tables can
be accessed as lookup tables, etc., in a
MapReduce job via creating an HTable instance in the setup method of the Mapper.
public class MyMapper extends TableMapper<Text, LongWritable> {
@@ -1235,12 +1326,12 @@ if (!b) {
public void setup(Context context) {
myOtherTable = new HTable("myOtherTable");
}
-
+
public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException {
// process Result...
// use 'myOtherTable' for lookups
}
-
+
@@ -1253,10 +1344,13 @@ if (!b) {
map-tasks which will double-write your data to HBase; this is probably
not what you want.
+ See for more information.
+
-
+
+
Architecture
@@ -1264,24 +1358,24 @@ if (!b) {
NoSQL?HBase is a type of "NoSQL" database. "NoSQL" is a general term meaning that the database isn't an RDBMS which
- supports SQL as it's primary access language, but there are many types of NoSQL databases: BerkeleyDB is an
+ supports SQL as its primary access language, but there are many types of NoSQL databases: BerkeleyDB is an
example of a local NoSQL database, whereas HBase is very much a distributed database. Technically speaking,
HBase is really more a "Data Store" than "Data Base" because it lacks many of the features you find in an RDBMS,
such as typed columns, secondary indexes, triggers, and advanced query languages, etc.
However, HBase has many features which supports both linear and modular scaling. HBase clusters expand
- by adding RegionServers that are hosted on commodity class servers. If a cluster expands from 10 to 20
+ by adding RegionServers that are hosted on commodity class servers. If a cluster expands from 10 to 20
RegionServers, for example, it doubles both in terms of storage and as well as processing capacity.
RDBMS can scale well, but only up to a point - specifically, the size of a single database server - and for the best
performance requires specialized hardware and storage devices. HBase features of note are:
- Strongly consistent reads/writes: HBase is not an "eventually consistent" DataStore. This
+ Strongly consistent reads/writes: HBase is not an "eventually consistent" DataStore. This
makes it very suitable for tasks such as high-speed counter aggregation. Automatic sharding: HBase tables are distributed on the cluster via regions, and regions are
automatically split and re-distributed as your data grows.Automatic RegionServer failover
- Hadoop/HDFS Integration: HBase supports HDFS out of the box as it's distributed file system.
- MapReduce: HBase supports massively parallelized processing via MapReduce for using HBase as both
+ Hadoop/HDFS Integration: HBase supports HDFS out of the box as its distributed file system.
+ MapReduce: HBase supports massively parallelized processing via MapReduce for using HBase as both
source and sink.Java Client API: HBase supports an easy to use Java API for programmatic access.Thrift/REST API: HBase also supports Thrift and REST for non-Java front-ends.
@@ -1289,12 +1383,12 @@ if (!b) {
Operational Management: HBase provides build-in web-pages for operational insight as well as JMX metrics.
-
-
+
+
When Should I Use HBase?HBase isn't suitable for every problem.
- First, make sure you have enough data. If you have hundreds of millions or billions of rows, then
+ First, make sure you have enough data. If you have hundreds of millions or billions of rows, then
HBase is a good candidate. If you only have a few thousand/million rows, then using a traditional RDBMS
might be a better choice due to the fact that all of your data might wind up on a single node (or two) and
the rest of the cluster may be sitting idle.
@@ -1302,7 +1396,7 @@ if (!b) {
Second, make sure you can live without all the extra features that an RDBMS provides (e.g., typed columns,
secondary indexes, transactions, advanced query languages, etc.) An application built against an RDBMS cannot be
"ported" to HBase by simply changing a JDBC driver, for example. Consider moving from an RDBMS to HBase as a
- complete redesign as opposed to a port.
+ complete redesign as opposed to a port.
Third, make sure you have enough hardware. Even HDFS doesn't do well with anything less than
5 DataNodes (due to things such as HDFS block replication which has a default of 3), plus a NameNode.
@@ -1313,9 +1407,9 @@ if (!b) {
What Is The Difference Between HBase and Hadoop/HDFS?
- HDFS is a distributed file system that is well suited for the storage of large files.
- It's documentation states that it is not, however, a general purpose file system, and does not provide fast individual record lookups in files.
- HBase, on the other hand, is built on top of HDFS and provides fast record lookups (and updates) for large tables.
+ HDFS is a distributed file system that is well suited for the storage of large files.
+ It's documentation states that it is not, however, a general purpose file system, and does not provide fast individual record lookups in files.
+ HBase, on the other hand, is built on top of HDFS and provides fast record lookups (and updates) for large tables.
This can sometimes be a point of conceptual confusion. HBase internally puts your data in indexed "StoreFiles" that exist
on HDFS for high-speed lookups. See the and the rest of this chapter for more information on how HBase achieves its goals.
@@ -1324,19 +1418,19 @@ if (!b) {
Catalog Tables
- The catalog tables -ROOT- and .META. exist as HBase tables. They are are filtered out
+ The catalog tables -ROOT- and .META. exist as HBase tables. They are filtered out
of the HBase shell's list command, but they are in fact tables just like any other.
ROOT
- -ROOT- keeps track of where the .META. table is. The -ROOT- table structure is as follows:
+ -ROOT- keeps track of where the .META. table is. The -ROOT- table structure is as follows:
- Key:
+ Key:
.META. region key (.META.,,1)
- Values:
+ Values:
info:regioninfo (serialized HRegionInfo
instance of .META.)
@@ -1347,14 +1441,14 @@ if (!b) {
META
- The .META. table keeps a list of all regions in the system. The .META. table structure is as follows:
+ The .META. table keeps a list of all regions in the system. The .META. table structure is as follows:
- Key:
+ Key:
Region key of the format ([table],[region start key],[region id])
- Values:
+ Values:
info:regioninfo (serialized
HRegionInfo instance for this region)
@@ -1363,12 +1457,12 @@ if (!b) {
info:serverstartcode (start-time of the RegionServer process containing this region)
- When a table is in the process of splitting two other columns will be created, info:splitA and info:splitB
+ When a table is in the process of splitting two other columns will be created, info:splitA and info:splitB
which represent the two daughter regions. The values for these columns are also serialized HRegionInfo instances.
After the region has been split eventually this row will be deleted.
Notes on HRegionInfo: the empty key is used to denote table start and table end. A region with an empty start key
- is the first region in a table. If region has both an empty start and an empty end key, its the only region in the table
+ is the first region in a table. If region has both an empty start and an empty end key, it's the only region in the table
In the (hopefully unlikely) event that programmatic processing of catalog metadata is required, see the
Writables utility.
@@ -1380,9 +1474,9 @@ if (!b) {
For information on region-RegionServer assignment, see .
-
+
-
+
ClientThe HBase client
@@ -1398,7 +1492,7 @@ if (!b) {
need not go through the lookup process. Should a region be reassigned
either by the master load balancer or because a RegionServer has died,
the client will requery the catalog tables to determine the new
- location of the user region.
+ location of the user region.
See for more information about the impact of the Master on HBase Client
communication.
@@ -1406,10 +1500,11 @@ if (!b) {
Administrative functions are handled through HBaseAdmin
Connections
- For connection configuration information, see .
+ For connection configuration information, see .
- HTable
-instances are not thread-safe. When creating HTable instances, it is advisable to use the same HBaseConfiguration
+ HTable
+ instances are not thread-safe. Only one thread use an instance of HTable at any given
+ time. When creating HTable instances, it is advisable to use the same HBaseConfiguration
instance. This will ensure sharing of ZooKeeper and socket instances to the RegionServers
which is usually what you want. For example, this is preferred:
HBaseConfiguration conf = HBaseConfiguration.create();
@@ -1425,7 +1520,19 @@ HTable table2 = new HTable(conf2, "myTable");Connection PoolingFor applications which require high-end multithreaded access (e.g., web-servers or application servers that may serve many application threads
- in a single JVM), see HTablePool.
+ in a single JVM), one solution is HTablePool.
+ But as written currently, it is difficult to control client resource consumption when using HTablePool.
+
+
+ Another solution is to precreate an HConnection using
+ // Create a connection to the cluster.
+HConnection connection = HConnectionManager.createConnection(Configuration);
+HTableInterface table = connection.getTable("myTable");
+// use table as needed, the table returned is lightweight
+table.close();
+// use the connection for other access to the cluster
+connection.close();
+ Constructing HTableInterface implementation is very lightweight and resources are controlled/shared if you go this route.
@@ -1436,9 +1543,9 @@ HTable table2 = new HTable(conf2, "myTable");
is filled. The writebuffer is 2MB by default. Before an HTable instance is
discarded, either close() or
flushCommits() should be invoked so Puts
- will not be lost.
-
- Note: htable.delete(Delete); does not go in the writebuffer! This only applies to Puts.
+ will not be lost.
+
+ Note: htable.delete(Delete); does not go in the writebuffer! This only applies to Puts.
For additional information on write durability, review the ACID semantics page.
@@ -1456,15 +1563,15 @@ HTable table2 = new HTable(conf2, "myTable");
in the client API however they are discouraged because if not managed properly these can
lock up the RegionServers.
- There is an oustanding ticket HBASE-2332 to
+ There is an oustanding ticket HBASE-2332 to
remove this feature from the client.
-
+
Client Request FiltersGet and Scan instances can be
- optionally configured with filters which are applied on the RegionServer.
+ optionally configured with filters which are applied on the RegionServer.
Filters can be confusing because there are many different types, and it is best to approach them by understanding the groups
of Filter functionality.
@@ -1473,8 +1580,8 @@ HTable table2 = new HTable(conf2, "myTable");
Structural Filters contain other Filters.FilterListFilterList
- represents a list of Filters with a relationship of FilterList.Operator.MUST_PASS_ALL or
- FilterList.Operator.MUST_PASS_ONE between the Filters. The following example shows an 'or' between two
+ represents a list of Filters with a relationship of FilterList.Operator.MUST_PASS_ALL or
+ FilterList.Operator.MUST_PASS_ONE between the Filters. The following example shows an 'or' between two
Filters (checking for either 'my value' or 'my other value' on the same attribute).
FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ONE);
@@ -1521,7 +1628,7 @@ scan.setFilter(filter);
RegexStringComparatorRegexStringComparator
- supports regular expressions for value comparisons.
+ supports regular expressions for value comparisons.
RegexStringComparator comp = new RegexStringComparator("my."); // any value that starts with 'my'
SingleColumnValueFilter filter = new SingleColumnValueFilter(
@@ -1532,7 +1639,7 @@ SingleColumnValueFilter filter = new SingleColumnValueFilter(
);
scan.setFilter(filter);
- See the Oracle JavaDoc for supported RegEx patterns in Java.
+ See the Oracle JavaDoc for supported RegEx patterns in Java.
SubstringComparator
@@ -1663,36 +1770,40 @@ rs.close();
RowKeyRowFilter
- It is generally a better idea to use the startRow/stopRow methods on Scan for row selection, however
+ It is generally a better idea to use the startRow/stopRow methods on Scan for row selection, however
RowFilter can also be used.UtilityFirstKeyOnlyFilter
- This is primarily used for rowcount jobs.
+ This is primarily used for rowcount jobs.
See FirstKeyOnlyFilter.
-
+
MasterHMaster is the implementation of the Master Server. The Master server
is responsible for monitoring all RegionServer instances in the cluster, and is
- the interface for all metadata changes. In a distributed cluster, the Master typically runs on the .
+ the interface for all metadata changes. In a distributed cluster, the Master typically runs on the
+ J Mohamed Zahoor goes into some more detail on the Master Architecture in this blog posting, HBase HMaster Architecture
+ .
+ Startup BehaviorIf run in a multi-Master environment, all Masters compete to run the cluster. If the active
- Master loses it's lease in ZooKeeper (or the Master shuts down), then then the remaining Masters jostle to
+ Master loses its lease in ZooKeeper (or the Master shuts down), then then the remaining Masters jostle to
take over the Master role.
Runtime ImpactA common dist-list question is what happens to an HBase cluster when the Master goes down. Because the
- HBase client talks directly to the RegionServers, the cluster can still function in a "steady
+ HBase client talks directly to the RegionServers, the cluster can still function in a "steady
state." Additionally, per ROOT and META exist as HBase tables (i.e., are
- not resident in the Master). However, the Master controls critical functions such as RegionServer failover and
- completing region splits. So while the cluster can still run for a time without the Master,
- the Master should be restarted as soon as possible.
+ not resident in the Master). However, the Master controls critical functions such as RegionServer failover and
+ completing region splits. So while the cluster can still run for a time without the Master,
+ the Master should be restarted as soon as possible.
Interface
@@ -1700,20 +1811,20 @@ rs.close();
Table (createTable, modifyTable, removeTable, enable, disable)
- ColumnFamily (addColumn, modifyColumn, removeColumn)
+ ColumnFamily (addColumn, modifyColumn, removeColumn)
Region (move, assign, unassign)
- For example, when the HBaseAdmin method disableTable is invoked, it is serviced by the Master server.
+ For example, when the HBaseAdmin method disableTable is invoked, it is serviced by the Master server.
ProcessesThe Master runs several background threads:
LoadBalancer
- Periodically, and when there are not any regions in transition,
- a load balancer will run and move regions around to balance cluster load.
+ Periodically, and when there are no regions in transition,
+ a load balancer will run and move regions around to balance the cluster's load.
See for configuring this property.See for more information on region assignment.
@@ -1726,18 +1837,18 @@ rs.close();
RegionServerHRegionServer is the RegionServer implementation. It is responsible for serving and managing regions.
- In a distributed cluster, a RegionServer runs on a .
+ In a distributed cluster, a RegionServer runs on a .
InterfaceThe methods exposed by HRegionRegionInterface contain both data-oriented and region-maintenance methods:
Data (get, put, delete, next, etc.)
- Region (splitRegion, compactRegion, etc.)
+ Region (splitRegion, compactRegion, etc.)
For example, when the HBaseAdmin method majorCompact is invoked on a table, the client is actually iterating through
- all regions for the specified table and requesting a major compaction directly to each region.
+ all regions for the specified table and requesting a major compaction directly to each region.
Processes
@@ -1761,7 +1872,7 @@ rs.close();
posted. Documentation will eventually move to this reference guide, but the blog is the most current information available at this time.
-
+
Block Cache
@@ -1849,9 +1960,9 @@ rs.close();
PurposeEach RegionServer adds updates (Puts, Deletes) to its write-ahead log (WAL)
- first, and then to the for the affected .
- This ensures that HBase has durable writes. Without WAL, there is the possibility of data loss in the case of a RegionServer failure
- before each MemStore is flushed and new StoreFiles are written. HLog
+ first, and then to the for the affected .
+ This ensures that HBase has durable writes. Without WAL, there is the possibility of data loss in the case of a RegionServer failure
+ before each MemStore is flushed and new StoreFiles are written. HLog
is the HBase WAL implementation, and there is one HLog instance per RegionServer.
The WAL is in HDFS in /hbase/.logs/ with subdirectories per region.
@@ -1875,11 +1986,11 @@ rs.close();
hbase.hlog.split.skip.errors
- When set to true, the default, any error
+ When set to true, any error
encountered splitting will be logged, the problematic WAL will be
moved into the .corrupt directory under the hbase
rootdir, and processing will continue. If set to
- false, the exception will be propagated and the
+ false, the default, the exception will be propagated and the
split logged as failed.See HBASE-2958
@@ -1912,10 +2023,10 @@ rs.close();
RegionsRegions are the basic element of availability and
- distribution for tables, and are comprised of a Store per Column Family. The heirarchy of objects
+ distribution for tables, and are comprised of a Store per Column Family. The heirarchy of objects
is as follows:
-Table (HBase table)
+Table (HBase table)
Region (Regions for the table)
Store (Store per ColumnFamily for each Region for the table)
MemStore (MemStore for each Store for each Region for the table)
@@ -1924,7 +2035,7 @@ rs.close();
For a description of what HBase files look like when written to HDFS, see .
-
+
Region Size
@@ -1936,13 +2047,13 @@ rs.close();
HBase scales by having regions across many servers. Thus if
you have 2 regions for 16GB data, on a 20 node machine your data
will be concentrated on just a few machines - nearly the entire
- cluster will be idle. This really cant be stressed enough, since a
- common problem is loading 200MB data into HBase then wondering why
+ cluster will be idle. This really cant be stressed enough, since a
+ common problem is loading 200MB data into HBase then wondering why
your awesome 10 node cluster isn't doing anything.
- On the other hand, high region count has been known to make things slow.
+ On the other hand, high region count has been known to make things slow.
This is getting better with each release of HBase, but it is probably better to have
700 regions than 3000 for the same amount of data.
@@ -1953,7 +2064,7 @@ rs.close();
- When starting off, its probably best to stick to the default region-size, perhaps going
+ When starting off, it's probably best to stick to the default region-size, perhaps going
smaller for hot tables (or manually split hot regions to spread the load over
the cluster), or go with larger region sizes if your cell sizes tend to be
largish (100k and up).
@@ -1977,10 +2088,10 @@ rs.close();
If the region assignment is still valid (i.e., if the RegionServer is still online)
then the assignment is kept.
- If the assignment is invalid, then the LoadBalancerFactory is invoked to assign the
+ If the assignment is invalid, then the LoadBalancerFactory is invoked to assign the
region. The DefaultLoadBalancer will randomly assign the region to a RegionServer.
- META is updated with the RegionServer assignment (if needed) and the RegionServer start codes
+ META is updated with the RegionServer assignment (if needed) and the RegionServer start codes
(start time of the RegionServer process) upon region opening by the RegionServer.
@@ -1996,7 +2107,7 @@ rs.close();
The Master will detect that the RegionServer has failed.
The region assignments will be considered invalid and will be re-assigned just
- like the startup sequence.
+ like the startup sequence.
@@ -2023,14 +2134,14 @@ rs.close();
Third replica is written to a node in another rack (if sufficient nodes)
- Thus, HBase eventually achieves locality for a region after a flush or a compaction.
+ Thus, HBase eventually achieves locality for a region after a flush or a compaction.
In a RegionServer failover situation a RegionServer may be assigned regions with non-local
StoreFiles (because none of the replicas are local), however as new data is written
in the region, or the table is compacted and StoreFiles are re-written, they will become "local"
- to the RegionServer.
+ to the RegionServer.
For more information, see HDFS Design on Replica Placement
- and also Lars George's blog on HBase and HDFS locality.
+ and also Lars George's blog on HBase and HDFS locality.
@@ -2048,7 +2159,7 @@ rs.close();
The default split policy can be overwritten using a custom RegionSplitPolicy (HBase 0.94+).
Typically a custom split policy should extend HBase's default split policy: ConstantSizeRegionSplitPolicy.
- The policy can set globally through the HBaseConfiguration used or on a per table basis:
+ The policy can set globally through the HBaseConfiguration used or on a per table basis:
HTableDescriptor myHtd = ...;
myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName());
@@ -2064,8 +2175,8 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
MemStoreThe MemStore holds in-memory modifications to the Store. Modifications are KeyValues.
- When asked to flush, current memstore is moved to snapshot and is cleared.
- HBase continues to serve edits out of new memstore and backing snapshot until flusher reports in that the
+ When asked to flush, current memstore is moved to snapshot and is cleared.
+ HBase continues to serve edits out of new memstore and backing snapshot until flusher reports in that the
flush succeeded. At this point the snapshot is let go.
@@ -2076,7 +2187,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
The hfile file format is based on
the SSTable file described in the BigTable [2006] paper and on
Hadoop's tfile
- (The unit test suite and the compression harness were taken directly from tfile).
+ (The unit test suite and the compression harness were taken directly from tfile).
Schubert Zhang's blog post on HFile: A Block-Indexed File Format to Store Sorted Key-Value Pairs makes for a thorough introduction to HBase's hfile. Matteo Bertozzi has also put up a
helpful description, HBase I/O: HFile.
@@ -2103,7 +2214,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
-
+
BlocksStoreFiles are composed of blocks. The blocksize is configured on a per-ColumnFamily basis.
@@ -2116,7 +2227,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
KeyValueThe KeyValue class is the heart of data storage in HBase. KeyValue wraps a byte array and takes offsets and lengths into passed array
- at where to start interpreting the content as KeyValue.
+ at where to start interpreting the content as KeyValue.
The KeyValue format inside a byte array is:
@@ -2180,7 +2291,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
CompactionThere are two types of compactions: minor and major. Minor compactions will usually pick up a couple of the smaller adjacent
StoreFiles and rewrite them as one. Minors do not drop deletes or expired cells, only major compactions do this. Sometimes a minor compaction
- will pick up all the StoreFiles in the Store and in this case it actually promotes itself to being a major compaction.
+ will pick up all the StoreFiles in the Store and in this case it actually promotes itself to being a major compaction.
After a major compaction runs there will be a single StoreFile per Store, and this will help performance usually. Caution: major compactions rewrite all of the Stores data and on a loaded system, this may not be tenable;
major compactions will usually have to be done manually on large systems. See .
@@ -2189,7 +2300,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
Compaction File Selection
- To understand the core algorithm for StoreFile selection, there is some ASCII-art in the Store source code that
+ To understand the core algorithm for StoreFile selection, there is some ASCII-art in the Store source code that
will serve as useful reference. It has been copied below:
/* normal skew:
@@ -2211,16 +2322,16 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
hbase.hstore.compaction.min (.90 hbase.hstore.compactionThreshold) (files) Minimum number
of StoreFiles per Store to be selected for a compaction to occur (default 2).hbase.hstore.compaction.max (files) Maximum number of StoreFiles to compact per minor compaction (default 10).
- hbase.hstore.compaction.min.size (bytes)
- Any StoreFile smaller than this setting with automatically be a candidate for compaction. Defaults to
+ hbase.hstore.compaction.min.size (bytes)
+ Any StoreFile smaller than this setting with automatically be a candidate for compaction. Defaults to
hbase.hregion.memstore.flush.size (128 mb).
- hbase.hstore.compaction.max.size (.92) (bytes)
+ hbase.hstore.compaction.max.size (.92) (bytes)
Any StoreFile larger than this setting with automatically be excluded from compaction (default Long.MAX_VALUE). The minor compaction StoreFile selection logic is size based, and selects a file for compaction when the file
<= sum(smaller_files) * hbase.hstore.compaction.ratio.
-
+ Minor Compaction File Selection - Example #1 (Basic Example)
@@ -2228,21 +2339,21 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
hbase.store.compaction.ratio = 1.0f hbase.hstore.compaction.min = 3 (files) >
- hbase.hstore.compaction.max = 5 (files) >
+ hbase.hstore.compaction.max = 5 (files) >
hbase.hstore.compaction.min.size = 10 (bytes) >
hbase.hstore.compaction.max.size = 1000 (bytes) >
The following StoreFiles exist: 100, 50, 23, 12, and 12 bytes apiece (oldest to newest).
With the above parameters, the files that would be selected for minor compaction are 23, 12, and 12.
-
+
Why?
100 --> No, because sum(50, 23, 12, 12) * 1.0 = 97. 50 --> No, because sum(23, 12, 12) * 1.0 = 47. 23 --> Yes, because sum(12, 12) * 1.0 = 24.
- 12 --> Yes, because the previous file has been included, and because this
+ 12 --> Yes, because the previous file has been included, and because this
does not exceed the the max-file limit of 5
- 12 --> Yes, because the previous file had been included, and because this
+ 12 --> Yes, because the previous file had been included, and because this
does not exceed the the max-file limit of 5.
@@ -2253,19 +2364,19 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
hbase.store.compaction.ratio = 1.0f hbase.hstore.compaction.min = 3 (files) >
- hbase.hstore.compaction.max = 5 (files) >
+ hbase.hstore.compaction.max = 5 (files) >
hbase.hstore.compaction.min.size = 10 (bytes) >
hbase.hstore.compaction.max.size = 1000 (bytes) >
-
+
The following StoreFiles exist: 100, 25, 12, and 12 bytes apiece (oldest to newest).
- With the above parameters, the files that would be selected for minor compaction are 23, 12, and 12.
-
+ With the above parameters, no compaction will be started.
+
Why?
100 --> No, because sum(25, 12, 12) * 1.0 = 4725 --> No, because sum(12, 12) * 1.0 = 24
- 12 --> No. Candidate because sum(12) * 1.0 = 12, there are only 2 files to compact and that is less than the threshold of 3
+ 12 --> No. Candidate because sum(12) * 1.0 = 12, there are only 2 files to compact and that is less than the threshold of 312 --> No. Candidate because the previous StoreFile was, but there are not enough files to compact
@@ -2276,13 +2387,13 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
hbase.store.compaction.ratio = 1.0f hbase.hstore.compaction.min = 3 (files) >
- hbase.hstore.compaction.max = 5 (files) >
+ hbase.hstore.compaction.max = 5 (files) >
hbase.hstore.compaction.min.size = 10 (bytes) >
hbase.hstore.compaction.max.size = 1000 (bytes) >
The following StoreFiles exist: 7, 6, 5, 4, 3, 2, and 1 bytes apiece (oldest to newest).
- With the above parameters, the files that would be selected for minor compaction are 7, 6, 5, 4, 3.
-
+ With the above parameters, the files that would be selected for minor compaction are 7, 6, 5, 4, 3.
+
Why?
7 --> Yes, because sum(6, 5, 4, 3, 2, 1) * 1.0 = 21. Also, 7 is less than the min-size
@@ -2303,74 +2414,126 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
hbase.hstore.compaction.min.size. Because
this limit represents the "automatic include" limit for all StoreFiles smaller than this value, this value may need to
be adjusted downwards in write-heavy environments where many 1 or 2 mb StoreFiles are being flushed, because every file
- will be targeted for compaction and the resulting files may still be under the min-size and require further compaction, etc.
+ will be targeted for compaction and the resulting files may still be under the min-size and require further compaction, etc.
-
-
- Bloom Filters
- Bloom filters were developed over in HBase-1200
- Add bloomfilters.
- For description of the development process -- why static blooms
- rather than dynamic -- and for an overview of the unique properties
- that pertain to blooms in HBase, as well as possible future
- directions, see the Development Process section
- of the document BloomFilters
- in HBase attached to HBase-1200.
-
- The bloom filters described here are actually version two of
- blooms in HBase. In versions up to 0.19.x, HBase had a dynamic bloom
- option based on work done by the European Commission One-Lab
- Project 034819. The core of the HBase bloom work was later
- pulled up into Hadoop to implement org.apache.hadoop.io.BloomMapFile.
- Version 1 of HBase blooms never worked that well. Version 2 is a
- rewrite from scratch though again it starts with the one-lab
- work.
-
- See also and .
-
-
-
- Bloom StoreFile footprint
- Bloom filters add an entry to the StoreFile
- general FileInfo data structure and then two
- extra entries to the StoreFile metadata
- section.
-
-
- BloomFilter in the StoreFile
- FileInfo data structure
+
- FileInfo has a
- BLOOM_FILTER_TYPE entry which is set to
- NONE, ROW or
- ROWCOL.
+ Bulk Loading
+ Overview
+
+ HBase includes several methods of loading data into tables.
+ The most straightforward method is to either use the TableOutputFormat
+ class from a MapReduce job, or use the normal client APIs; however,
+ these are not always the most efficient methods.
+
+
+ The bulk load feature uses a MapReduce job to output table data in HBase's internal
+ data format, and then directly loads the generated StoreFiles into a running
+ cluster. Using bulk load will use less CPU and network resources than
+ simply using the HBase API.
+
+
+ Bulk Load Architecture
+
+ The HBase bulk load process consists of two main steps.
+
+ Preparing data via a MapReduce job
+
+ The first step of a bulk load is to generate HBase data files (StoreFiles) from
+ a MapReduce job using HFileOutputFormat. This output format writes
+ out data in HBase's internal storage format so that they can be
+ later loaded very efficiently into the cluster.
+
+
+ In order to function efficiently, HFileOutputFormat must be
+ configured such that each output HFile fits within a single region.
+ In order to do this, jobs whose output will be bulk loaded into HBase
+ use Hadoop's TotalOrderPartitioner class to partition the map output
+ into disjoint ranges of the key space, corresponding to the key
+ ranges of the regions in the table.
+
+
+ HFileOutputFormat includes a convenience function,
+ configureIncrementalLoad(), which automatically sets up
+ a TotalOrderPartitioner based on the current region boundaries of a
+ table.
+
-
-
- BloomFilter entries in StoreFile
- metadata
-
- BLOOM_FILTER_META holds Bloom Size, Hash
- Function used, etc. Its small in size and is cached on
- StoreFile.Reader load
- BLOOM_FILTER_DATA is the actual bloomfilter
- data. Obtained on-demand. Stored in the LRU cache, if it is enabled
- (Its enabled by default).
+ Completing the data load
+
+ After the data has been prepared using
+ HFileOutputFormat, it is loaded into the cluster using
+ completebulkload. This command line tool iterates
+ through the prepared data files, and for each one determines the
+ region the file belongs to. It then contacts the appropriate Region
+ Server which adopts the HFile, moving it into its storage directory
+ and making the data available to clients.
+
+
+ If the region boundaries have changed during the course of bulk load
+ preparation, or between the preparation and completion steps, the
+ completebulkloads utility will automatically split the
+ data files into pieces corresponding to the new boundaries. This
+ process is not optimally efficient, so users should take care to
+ minimize the delay between preparing a bulk load and importing it
+ into the cluster, especially if other clients are simultaneously
+ loading data through other means.
+
-
-
-
-
+ Importing the prepared data using the completebulkload tool
+
+ After a data import has been prepared, either by using the
+ importtsv tool with the
+ "importtsv.bulk.output" option or by some other MapReduce
+ job using the HFileOutputFormat, the
+ completebulkload tool is used to import the data into the
+ running cluster.
+
+
+ The completebulkload tool simply takes the output path
+ where importtsv or your MapReduce job put its results, and
+ the table name to import into. For example:
+
+ $ hadoop jar hbase-VERSION.jar completebulkload [-c /path/to/hbase/config/hbase-site.xml] /user/todd/myoutput mytable
+
+ The -c config-file option can be used to specify a file
+ containing the appropriate hbase parameters (e.g., hbase-site.xml) if
+ not supplied already on the CLASSPATH (In addition, the CLASSPATH must
+ contain the directory that has the zookeeper configuration file if
+ zookeeper is NOT managed by HBase).
+
+
+ Note: If the target table does not already exist in HBase, this
+ tool will create the table automatically.
+
+ This tool will run quickly, after which point the new data will be visible in
+ the cluster.
+
+
+ See Also
+ For more information about the referenced utilities, see and .
+
+
+ Advanced Usage
+
+ Although the importtsv tool is useful in many cases, advanced users may
+ want to generate data programatically, or import data from other formats. To get
+ started doing so, dig into ImportTsv.java and check the JavaDoc for
+ HFileOutputFormat.
+
+
+ The import step of the bulk load can also be done programatically. See the
+ LoadIncrementalHFiles class for more information.
+
+
+
+
HDFSAs HBase runs on HDFS (and each StoreFile is written as a file on HDFS),
it is important to have an understanding of the HDFS Architecture
@@ -2389,15 +2552,18 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
for more information.
-
-
+
+
-
+
+
+
+ FAQ
@@ -2427,6 +2593,21 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
+
+ How can I find examples of NoSQL/HBase?
+
+ See the link to the BigTable paper in in the appendix, as
+ well as the other papers.
+
+
+
+
+ What is the history of HBase?
+
+ See .
+
+
+ Architecture
@@ -2541,7 +2722,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
- EC2 issues are a special case. See Troubleshooting and Performance sections.
+ EC2 issues are a special case. See Troubleshooting and Performance sections.
@@ -2581,6 +2762,214 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
+
+ hbck In Depth
+ HBaseFsck (hbck) is a tool for checking for region consistency and table integrity problems
+and repairing a corrupted HBase. It works in two basic modes -- a read-only inconsistency
+identifying mode and a multi-phase read-write repair mode.
+
+
+ Running hbck to identify inconsistencies
+To check to see if your HBase cluster has corruptions, run hbck against your HBase cluster:
+
+$ ./bin/hbase hbck
+
+
+At the end of the commands output it prints OK or tells you the number of INCONSISTENCIES
+present. You may also want to run run hbck a few times because some inconsistencies can be
+transient (e.g. cluster is starting up or a region is splitting). Operationally you may want to run
+hbck regularly and setup alert (e.g. via nagios) if it repeatedly reports inconsistencies .
+A run of hbck will report a list of inconsistencies along with a brief description of the regions and
+tables affected. The using the -details option will report more details including a representative
+listing of all the splits present in all the tables.
+
+
+$ ./bin/hbase hbck -details
+
+If you just want to know if some tables are corrupted, you can limit hbck to identify inconsistencies
+in only specific tables. For example the following command would only attempt to check table
+TableFoo and TableBar. The benefit is that hbck will run in less time.
+
+$ ./bin/hbase hbck TableFoo TableBar
+
+
+ Inconsistencies
+
+ If after several runs, inconsistencies continue to be reported, you may have encountered a
+corruption. These should be rare, but in the event they occur newer versions of HBase include
+the hbck tool enabled with automatic repair options.
+
+
+ There are two invariants that when violated create inconsistencies in HBase:
+
+
+ HBase’s region consistency invariant is satisfied if every region is assigned and
+deployed on exactly one region server, and all places where this state kept is in
+accordance.
+
+ HBase’s table integrity invariant is satisfied if for each table, every possible row key
+resolves to exactly one region.
+
+
+
+Repairs generally work in three phases -- a read-only information gathering phase that identifies
+inconsistencies, a table integrity repair phase that restores the table integrity invariant, and then
+finally a region consistency repair phase that restores the region consistency invariant.
+Starting from version 0.90.0, hbck could detect region consistency problems report on a subset
+of possible table integrity problems. It also included the ability to automatically fix the most
+common inconsistency, region assignment and deployment consistency problems. This repair
+could be done by using the -fix command line option. These problems close regions if they are
+open on the wrong server or on multiple region servers and also assigns regions to region
+servers if they are not open.
+
+
+Starting from HBase versions 0.90.7, 0.92.2 and 0.94.0, several new command line options are
+introduced to aid repairing a corrupted HBase. This hbck sometimes goes by the nickname
+“uberhbck”. Each particular version of uber hbck is compatible with the HBase’s of the same
+major version (0.90.7 uberhbck can repair a 0.90.4). However, versions <=0.90.6 and versions
+<=0.92.1 may require restarting the master or failing over to a backup master.
+
+
+ Localized repairs
+
+ When repairing a corrupted HBase, it is best to repair the lowest risk inconsistencies first.
+These are generally region consistency repairs -- localized single region repairs, that only modify
+in-memory data, ephemeral zookeeper data, or patch holes in the META table.
+Region consistency requires that the HBase instance has the state of the region’s data in HDFS
+(.regioninfo files), the region’s row in the .META. table., and region’s deployment/assignments on
+region servers and the master in accordance. Options for repairing region consistency include:
+
+ -fixAssignments (equivalent to the 0.90 -fix option) repairs unassigned, incorrectly
+assigned or multiply assigned regions.
+
+ -fixMeta which removes meta rows when corresponding regions are not present in
+HDFS and adds new meta rows if they regions are present in HDFS while not in META.
+
+
+ To fix deployment and assignment problems you can run this command:
+
+
+$ ./bin/hbase hbck -fixAssignments
+
+To fix deployment and assignment problems as well as repairing incorrect meta rows you can
+run this command:.
+
+$ ./bin/hbase hbck -fixAssignments -fixMeta
+
+There are a few classes of table integrity problems that are low risk repairs. The first two are
+degenerate (startkey == endkey) regions and backwards regions (startkey > endkey). These are
+automatically handled by sidelining the data to a temporary directory (/hbck/xxxx).
+The third low-risk class is hdfs region holes. This can be repaired by using the:
+
+ -fixHdfsHoles option for fabricating new empty regions on the file system.
+If holes are detected you can use -fixHdfsHoles and should include -fixMeta and -fixAssignments to make the new region consistent.
+
+
+
+$ ./bin/hbase hbck -fixAssignments -fixMeta -fixHdfsHoles
+
+Since this is a common operation, we’ve added a the -repairHoles flag that is equivalent to the
+previous command:
+
+$ ./bin/hbase hbck -repairHoles
+
+If inconsistencies still remain after these steps, you most likely have table integrity problems
+related to orphaned or overlapping regions.
+
+ Region Overlap Repairs
+Table integrity problems can require repairs that deal with overlaps. This is a riskier operation
+because it requires modifications to the file system, requires some decision making, and may
+require some manual steps. For these repairs it is best to analyze the output of a hbck -details
+run so that you isolate repairs attempts only upon problems the checks identify. Because this is
+riskier, there are safeguard that should be used to limit the scope of the repairs.
+WARNING: This is a relatively new and have only been tested on online but idle HBase instances
+(no reads/writes). Use at your own risk in an active production environment!
+The options for repairing table integrity violations include:
+
+ -fixHdfsOrphans option for “adopting” a region directory that is missing a region
+metadata file (the .regioninfo file).
+
+ -fixHdfsOverlaps ability for fixing overlapping regions
+
+
+When repairing overlapping regions, a region’s data can be modified on the file system in two
+ways: 1) by merging regions into a larger region or 2) by sidelining regions by moving data to
+“sideline” directory where data could be restored later. Merging a large number of regions is
+technically correct but could result in an extremely large region that requires series of costly
+compactions and splitting operations. In these cases, it is probably better to sideline the regions
+that overlap with the most other regions (likely the largest ranges) so that merges can happen on
+a more reasonable scale. Since these sidelined regions are already laid out in HBase’s native
+directory and HFile format, they can be restored by using HBase’s bulk load mechanism.
+The default safeguard thresholds are conservative. These options let you override the default
+thresholds and to enable the large region sidelining feature.
+
+ -maxMerge <n> maximum number of overlapping regions to merge
+
+ -sidelineBigOverlaps if more than maxMerge regions are overlapping, sideline attempt
+to sideline the regions overlapping with the most other regions.
+
+ -maxOverlapsToSideline <n> if sidelining large overlapping regions, sideline at most n
+regions.
+
+
+
+Since often times you would just want to get the tables repaired, you can use this option to turn
+on all repair options:
+
+ -repair includes all the region consistency options and only the hole repairing table
+integrity options.
+
+
+Finally, there are safeguards to limit repairs to only specific tables. For example the following
+command would only attempt to check and repair table TableFoo and TableBar.
+
+$ ./bin/hbase hbck -repair TableFoo TableBar
+
+ Special cases: Meta is not properly assigned
+There are a few special cases that hbck can handle as well.
+Sometimes the meta table’s only region is inconsistently assigned or deployed. In this case
+there is a special -fixMetaOnly option that can try to fix meta assignments.
+
+$ ./bin/hbase hbck -fixMetaOnly -fixAssignments
+
+
+ Special cases: HBase version file is missing
+HBase’s data on the file system requires a version file in order to start. If this flie is missing, you
+can use the -fixVersionFile option to fabricating a new HBase version file. This assumes that
+the version of hbck you are running is the appropriate version for the HBase cluster.
+
+ Special case: Root and META are corrupt.
+The most drastic corruption scenario is the case where the ROOT or META is corrupted and
+HBase will not start. In this case you can use the OfflineMetaRepair tool create new ROOT
+and META regions and tables.
+This tool assumes that HBase is offline. It then marches through the existing HBase home
+directory, loads as much information from region metadata files (.regioninfo files) as possible
+from the file system. If the region metadata has proper table integrity, it sidelines the original root
+and meta table directories, and builds new ones with pointers to the region directories and their
+data.
+
+$ ./bin/hbase org.apache.hadoop.hbase.util.hbck.OfflineMetaRepair
+
+NOTE: This tool is not as clever as uberhbck but can be used to bootstrap repairs that uberhbck
+can complete.
+If the tool succeeds you should be able to start hbase and run online repairs if necessary.
+
+ Special cases: Offline split parent
+
+Once a region is split, the offline parent will be cleaned up automatically. Sometimes, daughter regions
+are split again before their parents are cleaned up. HBase can clean up parents in the right order. However,
+there could be some lingering offline split parents sometimes. They are in META, in HDFS, and not deployed.
+But HBase can't clean them up. In this case, you can use the -fixSplitParents option to reset
+them in META to be online and not split. Therefore, hbck can merge them with other regions if fixing
+overlapping regions option is used.
+
+
+This option should not normally be used, and it is not in -fixAll.
+
+
+
+
+
Compression In HBaseCompression
@@ -2589,9 +2978,15 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
CompressionTest Tool
HBase includes a tool to test compression is set up properly.
- To run it, type /bin/hbase org.apache.hadoop.hbase.util.CompressionTest.
+ To run it, type /bin/hbase org.apache.hadoop.hbase.util.CompressionTest.
This will emit usage on how to run the tool.
+ You need to restart regionserver for it to pick up fixed codecs!
+ Be aware that the regionserver caches the result of the compression check it runs
+ ahead of each region open. This means
+ that you will have to restart the regionserver for it to notice that you have fixed
+ any codec issues.
+
@@ -2607,7 +3002,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
hbase.regionserver.codecs
to your hbase-site.xml with a value of
- codecs to test on startup. For example if the
+ codecs to test on startup. For example if the
hbase.regionserver.codecs
value is lzo,gz and if lzo is not present
@@ -2668,7 +3063,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName(
Build and install snappy on all nodes
- of your cluster.
+ of your cluster (see below)
@@ -2689,15 +3084,54 @@ hbase> describe 't1'
-
+
+
+ Installation
+
+
+ You will find the snappy library file under the .libs directory from your Snappy build (For example
+ /home/hbase/snappy-1.0.5/.libs/). The file is called libsnappy.so.1.x.x where 1.x.x is the version of the snappy
+ code you are building. You can either copy this file into your hbase directory under libsnappy.so name, or simply
+ create a symbolic link to it.
+
+
+
+ The second file you need is the hadoop native library. You will find this file in your hadoop installation directory
+ under lib/native/Linux-amd64-64/ or lib/native/Linux-i386-32/. The file you are looking for is libhadoop.so.1.x.x.
+ Again, you can simply copy this file or link to it, under the name libhadoop.so.
+
+
+ At the end of the installation, you should have both libsnappy.so and libhadoop.so links or files present into
+ lib/native/Linux-amd64-64 or into lib/native/Linux-i386-32
+
+ To point hbase at snappy support, in hbase-env.sh set
+ export HBASE_LIBRARY_PATH=/pathtoyourhadoop/lib/native/Linux-amd64-64
+ In /pathtoyourhadoop/lib/native/Linux-amd64-64 you should have something like:
+
+ libsnappy.a
+ libsnappy.so
+ libsnappy.so.1
+ libsnappy.so.1.1.2
+
+
+
+
+
+ Changing Compression Schemes
+ A frequent question on the dist-list is how to change compression schemes for ColumnFamilies. This is actually quite simple,
+ and can be done via an alter command. Because the compression scheme is encoded at the block-level in StoreFiles, the table does
+ not need to be re-created and the data does not copied somewhere else. Just make sure
+ the old codec is still available until you are sure that all of the old StoreFiles have been compacted.
+
+ YCSB: The Yahoo! Cloud Serving Benchmark and HBaseTODO: Describe how YCSB is poor for putting up a decent cluster load.TODO: Describe setup of YCSB for HBase
- Ted Dunning redid YCSB so its mavenized and added facility for verifying workloads. See Ted Dunning's YCSB.
+ Ted Dunning redid YCSB so it's mavenized and added facility for verifying workloads. See Ted Dunning's YCSB.
@@ -2719,7 +3153,7 @@ hbase> describe 't1'
HFile Version 1
- HFile Version 1
+ HFile Version 1
@@ -2762,7 +3196,7 @@ hbase> describe 't1'
HFile Version 2
- HFile Version 2
+ HFile Version 2
@@ -2791,7 +3225,7 @@ hbase> describe 't1'
- META – meta blocks (not used for Bloom filters in version 2 anymore)
+ META – meta blocks (not used for Bloom filters in version 2 anymore)
@@ -2816,7 +3250,7 @@ hbase> describe 't1'
- TRAILER – a fixed>size file trailer. As opposed to the above, this is not an
+ TRAILER – a fixed>size file trailer. As opposed to the above, this is not an
HFile v2 block but a fixed>size (for each HFile version) data structure
@@ -2831,7 +3265,7 @@ hbase> describe 't1'
Compressed size of the block's data, not including the header (int).
-Can be used for skipping the current data block when scanning HFile data.
+Can be used for skipping the current data block when scanning HFile data.
@@ -2961,12 +3395,12 @@ This offset may point to a data block or to a deeper>level index block.
-Offset of the block referenced by this entry in the file (long)
+Offset of the block referenced by this entry in the file (long)
-On>disk size of the referenced block (int)
+On>disk size of the referenced block (int)
@@ -3207,6 +3641,8 @@ Comparator class used for Bloom filter keys, a UTF>8 encoded string stored usi
HBase Wiki has a page with a number of presentations.
+ HBase RefCard from DZone.
+ HBase BooksHBase: The Definitive Guide by Lars George.
@@ -3216,16 +3652,29 @@ Comparator class used for Bloom filter keys, a UTF>8 encoded string stored usi
Hadoop: The Definitive Guide by Tom White.
-
+
+
+
+ HBase History
+
+ 2006: BigTable paper published by Google.
+
+ 2006 (end of year): HBase development starts.
+
+ 2008: HBase becomes Hadoop sub-project.
+
+ 2010: HBase becomes Apache top-level project.
+
+ HBase and the Apache Software FoundationHBase is a project in the Apache Software Foundation and as such there are responsibilities to the ASF to ensure
a healthy project.ASF Development Process
- See the Apache Development Process page
+ See the Apache Development Process page
for all sorts of information on how the ASF is structured (e.g., PMC, committers, contributors), to tips on contributing
- and getting involved, and how open-source works at ASF.
+ and getting involved, and how open-source works at ASF.
ASF Board Reporting
@@ -3235,6 +3684,67 @@ Comparator class used for Bloom filter keys, a UTF>8 encoded string stored usi
+ Enabling Dapper-like Tracing in HBase
+HBASE-6449 added support
+for tracing requests through HBase, using the open source tracing library,
+HTrace. Setting up tracing is quite simple,
+however it currently requires some very minor changes to your client code (it would not be very difficult to remove this requirement).
+
+SpanReceivers
+The tracing system works by collecting information in structs called ‘Spans’.
+It is up to you to choose how you want to receive this information by implementing the
+SpanReceiver interface, which defines one method:
+public void receiveSpan(Span span);
+This method serves as a callback whenever a span is completed. HTrace allows you to use
+as many SpanReceivers as you want so you can easily send trace information to multiple destinations.
+
+
+Configure what SpanReceivers you’d like to use by putting a comma separated list of the
+fully-qualified class name of classes implementing SpanReceiver in
+hbase-site.xml property: hbase.trace.spanreceiver.classes.
+
+
+HBase includes a HBaseLocalFileSpanReceiver that writes all span
+information to local files in a JSON-based format. The HBaseLocalFileSpanReceiver
+looks in hbase-site.xml for a hbase.trace.spanreceiver.localfilespanreceiver.filename
+property with a value describing the name of the file to which nodes should write their span information.
+
+
+If you do not want to use the included HBaseLocalFileSpanReceiver,
+you are encouraged to write your own receiver (take a look at HBaseLocalFileSpanReceiver
+for an example). If you think others would benefit from your receiver, file a JIRA or send a pull request to
+HTrace.
+
+
+
+Client Modifications
+Currently, you must turn on tracing in your client code. To do this, you simply turn on tracing for
+requests you think are interesting, and turn it off when the request is done.
+
+
+For example, if you wanted to trace all of your get operations, you change this:
+HTable table = new HTable(...);
+Get get = new Get(...);
+
+into:
+
+Span getSpan = Trace.startSpan(“doing get”, Sampler.ALWAYS);
+try {
+ HTable table = new HTable(...);
+ Get get = new Get(...);
+...
+} finally {
+ getSpan.stop();
+}
+
+If you wanted to trace half of your ‘get’ operations, you would pass in:
+new ProbabilitySampler(0.5) in lieu of Sampler.ALWAYS to Trace.startSpan().
+See the HTrace README for more information on Samplers.
+
+
+
+
+
Index
diff --git a/src/docbkx/case_studies.xml b/src/docbkx/case_studies.xml
new file mode 100644
index 000000000000..2e3bba0432f8
--- /dev/null
+++ b/src/docbkx/case_studies.xml
@@ -0,0 +1,324 @@
+
+
+
+ Apache HBase (TM) Case Studies
+
+ Overview
+ This chapter will describe a variety of performance and troubleshooting case studies that can
+ provide a useful blueprint on diagnosing Apache HBase (TM) cluster issues.
+ For more information on Performance and Troubleshooting, see and .
+
+
+
+
+ Schema Design
+
+
+ List Data
+ The following is an exchange from the user dist-list regarding a fairly common question:
+ how to handle per-user list data in Apache HBase.
+
+ *** QUESTION ***
+
+ We're looking at how to store a large amount of (per-user) list data in
+HBase, and we were trying to figure out what kind of access pattern made
+the most sense. One option is store the majority of the data in a key, so
+we could have something like:
+
+
+
+<FixedWidthUserName><FixedWidthValueId1>:"" (no value)
+<FixedWidthUserName><FixedWidthValueId2>:"" (no value)
+<FixedWidthUserName><FixedWidthValueId3>:"" (no value)
+
+
+The other option we had was to do this entirely using:
+
+<FixedWidthUserName><FixedWidthPageNum0>:<FixedWidthLength><FixedIdNextPageNum><ValueId1><ValueId2><ValueId3>...
+<FixedWidthUserName><FixedWidthPageNum1>:<FixedWidthLength><FixedIdNextPageNum><ValueId1><ValueId2><ValueId3>...
+
+
+where each row would contain multiple values.
+So in one case reading the first thirty values would be:
+
+
+scan { STARTROW => 'FixedWidthUsername' LIMIT => 30}
+
+And in the second case it would be
+
+get 'FixedWidthUserName\x00\x00\x00\x00'
+
+
+The general usage pattern would be to read only the first 30 values of
+these lists, with infrequent access reading deeper into the lists. Some
+users would have <= 30 total values in these lists, and some users would
+have millions (i.e. power-law distribution)
+
+
+ The single-value format seems like it would take up more space on HBase,
+but would offer some improved retrieval / pagination flexibility. Would
+there be any significant performance advantages to be able to paginate via
+gets vs paginating with scans?
+
+
+ My initial understanding was that doing a scan should be faster if our
+paging size is unknown (and caching is set appropriately), but that gets
+should be faster if we'll always need the same page size. I've ended up
+hearing different people tell me opposite things about performance. I
+assume the page sizes would be relatively consistent, so for most use cases
+we could guarantee that we only wanted one page of data in the
+fixed-page-length case. I would also assume that we would have infrequent
+updates, but may have inserts into the middle of these lists (meaning we'd
+need to update all subsequent rows).
+
+
+Thanks for help / suggestions / follow-up questions.
+
+ *** ANSWER ***
+
+If I understand you correctly, you're ultimately trying to store
+triples in the form "user, valueid, value", right? E.g., something
+like:
+
+
+"user123, firstname, Paul",
+"user234, lastname, Smith"
+
+
+(But the usernames are fixed width, and the valueids are fixed width).
+
+
+And, your access pattern is along the lines of: "for user X, list the
+next 30 values, starting with valueid Y". Is that right? And these
+values should be returned sorted by valueid?
+
+
+The tl;dr version is that you should probably go with one row per
+user+value, and not build a complicated intra-row pagination scheme on
+your own unless you're really sure it is needed.
+
+
+Your two options mirror a common question people have when designing
+HBase schemas: should I go "tall" or "wide"? Your first schema is
+"tall": each row represents one value for one user, and so there are
+many rows in the table for each user; the row key is user + valueid,
+and there would be (presumably) a single column qualifier that means
+"the value". This is great if you want to scan over rows in sorted
+order by row key (thus my question above, about whether these ids are
+sorted correctly). You can start a scan at any user+valueid, read the
+next 30, and be done. What you're giving up is the ability to have
+transactional guarantees around all the rows for one user, but it
+doesn't sound like you need that. Doing it this way is generally
+recommended (see
+here http://hbase.apache.org/book.html#schema.smackdown).
+
+
+Your second option is "wide": you store a bunch of values in one row,
+using different qualifiers (where the qualifier is the valueid). The
+simple way to do that would be to just store ALL values for one user
+in a single row. I'm guessing you jumped to the "paginated" version
+because you're assuming that storing millions of columns in a single
+row would be bad for performance, which may or may not be true; as
+long as you're not trying to do too much in a single request, or do
+things like scanning over and returning all of the cells in the row,
+it shouldn't be fundamentally worse. The client has methods that allow
+you to get specific slices of columns.
+
+
+Note that neither case fundamentally uses more disk space than the
+other; you're just "shifting" part of the identifying information for
+a value either to the left (into the row key, in option one) or to the
+right (into the column qualifiers in option 2). Under the covers,
+every key/value still stores the whole row key, and column family
+name. (If this is a bit confusing, take an hour and watch Lars
+George's excellent video about understanding HBase schema design:
+http://www.youtube.com/watch?v=_HLoH_PgrLk).
+
+
+A manually paginated version has lots more complexities, as you note,
+like having to keep track of how many things are in each page,
+re-shuffling if new values are inserted, etc. That seems significantly
+more complex. It might have some slight speed advantages (or
+disadvantages!) at extremely high throughput, and the only way to
+really know that would be to try it out. If you don't have time to
+build it both ways and compare, my advice would be to start with the
+simplest option (one row per user+value). Start simple and iterate! :)
+
+
+
+
+
+
+
+
+ Performance/Troubleshooting
+
+
+ Case Study #1 (Performance Issue On A Single Node)
+ Scenario
+ Following a scheduled reboot, one data node began exhibiting unusual behavior. Routine MapReduce
+ jobs run against HBase tables which regularly completed in five or six minutes began taking 30 or 40 minutes
+ to finish. These jobs were consistently found to be waiting on map and reduce tasks assigned to the troubled data node
+ (e.g., the slow map tasks all had the same Input Split).
+ The situation came to a head during a distributed copy, when the copy was severely prolonged by the lagging node.
+
+
+ Hardware
+ Datanodes:
+
+ Two 12-core processors
+ Six Enerprise SATA disks
+ 24GB of RAM
+ Two bonded gigabit NICs
+
+
+ Network:
+
+ 10 Gigabit top-of-rack switches
+ 20 Gigabit bonded interconnects between racks.
+
+
+
+ Hypotheses
+ HBase "Hot Spot" Region
+ We hypothesized that we were experiencing a familiar point of pain: a "hot spot" region in an HBase table,
+ where uneven key-space distribution can funnel a huge number of requests to a single HBase region, bombarding the RegionServer
+ process and cause slow response time. Examination of the HBase Master status page showed that the number of HBase requests to the
+ troubled node was almost zero. Further, examination of the HBase logs showed that there were no region splits, compactions, or other region transitions
+ in progress. This effectively ruled out a "hot spot" as the root cause of the observed slowness.
+
+
+ HBase Region With Non-Local Data
+ Our next hypothesis was that one of the MapReduce tasks was requesting data from HBase that was not local to the datanode, thus
+ forcing HDFS to request data blocks from other servers over the network. Examination of the datanode logs showed that there were very
+ few blocks being requested over the network, indicating that the HBase region was correctly assigned, and that the majority of the necessary
+ data was located on the node. This ruled out the possibility of non-local data causing a slowdown.
+
+
+ Excessive I/O Wait Due To Swapping Or An Over-Worked Or Failing Hard Disk
+ After concluding that the Hadoop and HBase were not likely to be the culprits, we moved on to troubleshooting the datanode's hardware.
+ Java, by design, will periodically scan its entire memory space to do garbage collection. If system memory is heavily overcommitted, the Linux
+ kernel may enter a vicious cycle, using up all of its resources swapping Java heap back and forth from disk to RAM as Java tries to run garbage
+ collection. Further, a failing hard disk will often retry reads and/or writes many times before giving up and returning an error. This can manifest
+ as high iowait, as running processes wait for reads and writes to complete. Finally, a disk nearing the upper edge of its performance envelope will
+ begin to cause iowait as it informs the kernel that it cannot accept any more data, and the kernel queues incoming data into the dirty write pool in memory.
+ However, using vmstat(1) and free(1), we could see that no swap was being used, and the amount of disk IO was only a few kilobytes per second.
+
+
+ Slowness Due To High Processor Usage
+ Next, we checked to see whether the system was performing slowly simply due to very high computational load. top(1) showed that the system load
+ was higher than normal, but vmstat(1) and mpstat(1) showed that the amount of processor being used for actual computation was low.
+
+
+ Network Saturation (The Winner)
+ Since neither the disks nor the processors were being utilized heavily, we moved on to the performance of the network interfaces. The datanode had two
+ gigabit ethernet adapters, bonded to form an active-standby interface. ifconfig(8) showed some unusual anomalies, namely interface errors, overruns, framing errors.
+ While not unheard of, these kinds of errors are exceedingly rare on modern hardware which is operating as it should:
+
+$ /sbin/ifconfig bond0
+bond0 Link encap:Ethernet HWaddr 00:00:00:00:00:00
+inet addr:10.x.x.x Bcast:10.x.x.255 Mask:255.255.255.0
+UP BROADCAST RUNNING MASTER MULTICAST MTU:1500 Metric:1
+RX packets:2990700159 errors:12 dropped:0 overruns:1 frame:6 <--- Look Here! Errors!
+TX packets:3443518196 errors:0 dropped:0 overruns:0 carrier:0
+collisions:0 txqueuelen:0
+RX bytes:2416328868676 (2.4 TB) TX bytes:3464991094001 (3.4 TB)
+
+
+ These errors immediately lead us to suspect that one or more of the ethernet interfaces might have negotiated the wrong line speed. This was confirmed both by running an ICMP ping
+ from an external host and observing round-trip-time in excess of 700ms, and by running ethtool(8) on the members of the bond interface and discovering that the active interface
+ was operating at 100Mbs/, full duplex.
+
+$ sudo ethtool eth0
+Settings for eth0:
+Supported ports: [ TP ]
+Supported link modes: 10baseT/Half 10baseT/Full
+ 100baseT/Half 100baseT/Full
+ 1000baseT/Full
+Supports auto-negotiation: Yes
+Advertised link modes: 10baseT/Half 10baseT/Full
+ 100baseT/Half 100baseT/Full
+ 1000baseT/Full
+Advertised pause frame use: No
+Advertised auto-negotiation: Yes
+Link partner advertised link modes: Not reported
+Link partner advertised pause frame use: No
+Link partner advertised auto-negotiation: No
+Speed: 100Mb/s <--- Look Here! Should say 1000Mb/s!
+Duplex: Full
+Port: Twisted Pair
+PHYAD: 1
+Transceiver: internal
+Auto-negotiation: on
+MDI-X: Unknown
+Supports Wake-on: umbg
+Wake-on: g
+Current message level: 0x00000003 (3)
+Link detected: yes
+
+
+ In normal operation, the ICMP ping round trip time should be around 20ms, and the interface speed and duplex should read, "1000MB/s", and, "Full", respectively.
+
+
+
+ Resolution
+ After determining that the active ethernet adapter was at the incorrect speed, we used the ifenslave(8) command to make the standby interface
+ the active interface, which yielded an immediate improvement in MapReduce performance, and a 10 times improvement in network throughput:
+
+ On the next trip to the datacenter, we determined that the line speed issue was ultimately caused by a bad network cable, which was replaced.
+
+
+
+
+ Case Study #2 (Performance Research 2012)
+ Investigation results of a self-described "we're not sure what's wrong, but it seems slow" problem.
+ http://gbif.blogspot.com/2012/03/hbase-performance-evaluation-continued.html
+
+
+
+
+ Case Study #3 (Performance Research 2010))
+
+ Investigation results of general cluster performance from 2010. Although this research is on an older version of the codebase, this writeup
+ is still very useful in terms of approach.
+ http://hstack.org/hbase-performance-testing/
+
+
+
+
+ Case Study #4 (xcievers Config)
+ Case study of configuring xceivers, and diagnosing errors from mis-configurations.
+ http://www.larsgeorge.com/2012/03/hadoop-hbase-and-xceivers.html
+
+ See also .
+
+
+
+
+
+
diff --git a/src/docbkx/community.xml b/src/docbkx/community.xml
new file mode 100644
index 000000000000..2c09908aed98
--- /dev/null
+++ b/src/docbkx/community.xml
@@ -0,0 +1,109 @@
+
+
+
+ Community
+
+ Decisions
+
+ Feature Branches
+ Feature Branches are easy to make. You do not have to be a committer to make one. Just request the name of your branch be added to JIRA up on the
+ developer's mailing list and a committer will add it for you. Thereafter you can file issues against your feature branch in Apache HBase (TM) JIRA. Your code you
+ keep elsewhere -- it should be public so it can be observed -- and you can update dev mailing list on progress. When the feature is ready for commit,
+ 3 +1s from committers will get your feature mergedSee HBase, mail # dev - Thoughts about large feature dev branches
+
+
+
+ Patch +1 Policy
+
+The below policy is something we put in place 09/2012. It is a
+suggested policy rather than a hard requirement. We want to try it
+first to see if it works before we cast it in stone.
+
+
+Apache HBase is made of
+components.
+Components have one or more s. See the 'Description' field on the
+components
+JIRA page for who the current owners are by component.
+
+
+Patches that fit within the scope of a single Apache HBase component require,
+at least, a +1 by one of the component's owners before commit. If
+owners are absent -- busy or otherwise -- two +1s by non-owners will
+suffice.
+
+
+Patches that span components need at least two +1s before they can be
+committed, preferably +1s by owners of components touched by the
+x-component patch (TODO: This needs tightening up but I think fine for
+first pass).
+
+
+Any -1 on a patch by anyone vetos a patch; it cannot be committed
+until the justification for the -1 is addressed.
+
+
+
+
+ Community Roles
+
+ Component Owner
+
+Component owners are listed in the description field on this Apache HBase JIRA components
+page. The owners are listed in the 'Description' field rather than in the 'Component
+Lead' field because the latter only allows us list one individual
+whereas it is encouraged that components have multiple owners.
+
+
+Owners are volunteers who are (usually, but not necessarily) expert in
+their component domain and may have an agenda on how they think their
+Apache HBase component should evolve.
+
+
+Duties include:
+
+
+
+Owners will try and review patches that land within their component's scope.
+
+
+
+
+If applicable, if an owner has an agenda, they will publish their
+goals or the design toward which they are driving their component
+
+
+
+
+
+If you would like to be volunteer as a component owner, just write the
+dev list and we'll sign you up. Owners do not need to be committers.
+
+
+
+
diff --git a/src/docbkx/configuration.xml b/src/docbkx/configuration.xml
index 44936e19e98e..2d182fa6a600 100644
--- a/src/docbkx/configuration.xml
+++ b/src/docbkx/configuration.xml
@@ -26,14 +26,16 @@
* limitations under the License.
*/
-->
- Configuration
- This chapter is the Not-So-Quick start guide to HBase configuration.
- Please read this chapter carefully and ensure that all requirements have
+ Apache HBase (TM) Configuration
+ This chapter is the Not-So-Quick start guide to Apache HBase (TM) configuration. It goes
+ over system requirements, Hadoop setup, the different Apache HBase run modes, and the
+ various configurations in HBase. Please read this chapter carefully. At a mimimum
+ ensure that all have
been satisfied. Failure to do so will cause you (and us) grief debugging strange errors
and/or data loss.
-
+
- HBase uses the same configuration system as Hadoop.
+ Apache HBase uses the same configuration system as Apache Hadoop.
To configure a deploy, edit a file of environment variables
in conf/hbase-env.sh -- this configuration
is used mostly by the launcher shell scripts getting the cluster
@@ -55,17 +57,20 @@ to ensure well-formedness of your document after an edit session.
content of the conf directory to
all nodes of the cluster. HBase will not do this for you.
Use rsync.
-
+
+
+ Basic Prerequisites
+ This section lists required services and some required system configuration.
+
+
Java
-
- Just like Hadoop, HBase requires java 6 from Oracle. Usually
- you'll want to use the latest version available except the problematic
- u18 (u24 is the latest version as of this writing).
+ Just like Hadoop, HBase requires at least java 6 from
+ Oracle.
+
- Operating System
+ Operating Systemssh
@@ -73,14 +78,20 @@ to ensure well-formedness of your document after an edit session.
sshd must be running to use Hadoop's scripts to
manage remote Hadoop and HBase daemons. You must be able to ssh to all
nodes, including your local node, using passwordless login (Google
- "ssh passwordless login").
+ "ssh passwordless login"). If on mac osx, see the section,
+ SSH: Setting up Remote Desktop and Enabling Self-Login
+ on the hadoop wiki.
DNS
- HBase uses the local hostname to self-report it's IP address.
- Both forward and reverse DNS resolving should work.
+ HBase uses the local hostname to self-report its IP address.
+ Both forward and reverse DNS resolving must work in versions of
+ HBase previous to 0.92.0
+ The hadoop-dns-checker tool can be used to verify
+ DNS is working correctly on the cluster. The project README file provides detailed instructions on usage.
+.If your machine has multiple interfaces, HBase will use the
interface that the primary hostname resolves to.
@@ -97,15 +108,7 @@ to ensure well-formedness of your document after an edit session.
Loopback IP
- HBase expects the loopback IP address to be 127.0.0.1. Ubuntu and some other distributions,
- for example, will default to 127.0.1.1 and this will cause problems for you.
-
- /etc/hosts should look something like this:
-
- 127.0.0.1 localhost
- 127.0.0.1 ubuntu.ubuntu-domain ubuntu
-
-
+ HBase expects the loopback IP address to be 127.0.0.1. See
@@ -132,7 +135,7 @@ to ensure well-formedness of your document after an edit session.
- HBase is a database. It uses a lot of files all at the same time.
+ Apache HBase is a database. It uses a lot of files all at the same time.
The default ulimit -n -- i.e. user file limit -- of 1024 on most *nix systems
is insufficient (On mac os x its 256). Any significant amount of loading will
lead you to .
@@ -141,9 +144,9 @@ to ensure well-formedness of your document after an edit session.
2010-04-06 03:04:37,542 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_-6935524980745310745_1391901
Do yourself a favor and change the upper bound on the
number of file descriptors. Set it to north of 10k. The math runs roughly as follows: per ColumnFamily
- there is at least one StoreFile and possibly up to 5 or 6 if the region is under load. Multiply the
+ there is at least one StoreFile and possibly up to 5 or 6 if the region is under load. Multiply the
average number of StoreFiles per ColumnFamily times the number of regions per RegionServer. For example, assuming
- that a schema had 3 ColumnFamilies per region with an average of 3 StoreFiles per ColumnFamily,
+ that a schema had 3 ColumnFamilies per region with an average of 3 StoreFiles per ColumnFamily,
and there are 100 regions per RegionServer, the JVM will open 3 * 3 * 100 = 900 file descriptors
(not counting open jar files, config files, etc.)
@@ -153,7 +156,7 @@ to ensure well-formedness of your document after an edit session.
See Jack Levin's major hdfs issues
note up on the user list.The requirement that a database requires upping of system limits
- is not peculiar to HBase. See for example the section
+ is not peculiar to Apache HBase. See for example the section
Setting Shell Limits for the Oracle User in
Short Guide to install Oracle 10 on Linux..
@@ -198,7 +201,7 @@ to ensure well-formedness of your document after an edit session.
Windows
- HBase has been little tested running on Windows. Running a
+ Apache HBase has been little tested running on Windows. Running a
production install of HBase on top of Windows is not
recommended.
@@ -206,32 +209,61 @@ to ensure well-formedness of your document after an edit session.
xlink:href="http://cygwin.com/">Cygwin to have a *nix-like
environment for the shell scripts. The full details are explained in
the Windows
- Installation guide. Also
+ Installation guide. Also
search our user mailing list to pick
up latest fixes figured by Windows users.
-
+
HadoopHadoop
- Please read all of this section
- Please read this section to the end. Up front we
- wade through the weeds of Hadoop versions. Later we talk of what you must do in HBase
- to make it work w/ a particular Hadoop version.
-
-
-
- HBase will lose data unless it is running on an HDFS that has a durable
- sync implementation. Hadoop 0.20.2, Hadoop 0.20.203.0, and Hadoop 0.20.204.0
- DO NOT have this attribute.
- Currently only Hadoop versions 0.20.205.x or any release in excess of this
- version -- this includes hadoop 1.0.0 -- have a working, durable sync
+ Selecting a Hadoop version is critical for your HBase deployment. Below table shows some information about what versions of Hadoop are supported by various HBase versions. Based on the version of HBase, you should select the most appropriate version of Hadoop. We are not in the Hadoop distro selection business. You can use Hadoop distributions from Apache, or learn about vendor distributions of Hadoop at
+
+
+
+ Where
+
+ S = supported and tested,
+ X = not supported,
+ NT = it should run, but not tested enough.
+
+
+
+ Because HBase depends on Hadoop, it bundles an instance of the Hadoop jar under its lib directory. The bundled jar is ONLY for use in standalone mode. In distributed mode, it is critical that the version of Hadoop that is out on your cluster match what is under HBase. Replace the hadoop jar found in the HBase lib directory with the hadoop jar you are running on your cluster to avoid version mismatch issues. Make sure you replace the jar in HBase everywhere on your cluster. Hadoop version mismatch issues have various manifestations but often all looks like its hung up.
+
+
+ Apache HBase 0.92 and 0.94
+ HBase 0.92 and 0.94 versions can work with Hadoop versions, 0.20.205, 0.22.x, 1.0.x, and 1.1.x. HBase-0.94 can additionally work with Hadoop-0.23.x and 2.x, but you may have to recompile the code using the specific maven profile (see top level pom.xml)
+
+
+
+ Apache HBase 0.96
+ Apache HBase 0.96.0 requires Apache Hadoop 1.x at a minimum, and it can run equally well on hadoop-2.0.
+ As of Apache HBase 0.96.x, Apache Hadoop 1.0.x at least is required. We will no longer run properly on older Hadoops such as 0.20.205 or branch-0.20-append. Do not move to Apache HBase 0.96.x if you cannot upgrade your HadoopSee HBase, mail # dev - DISCUSS: Have hbase require at least hadoop 1.0.0 in hbase 0.96.0?.
+
+
+
+ Hadoop versions 0.20.x - 1.x
+
+ HBase will lose data unless it is running on an HDFS that has a durable
+ sync implementation. DO NOT use Hadoop 0.20.2, Hadoop 0.20.203.0, and Hadoop 0.20.204.0 which DO NOT have this attribute. Currently only Hadoop versions 0.20.205.x or any release in excess of this version -- this includes hadoop-1.0.0 -- have a working, durable sync
- On Hadoop VersionsThe Cloudera blog post An update on Apache Hadoop 1.0
by Charles Zedlweski has a nice exposition on how all the Hadoop versions relate.
Its worth checking out if you are having trouble making sense of the
@@ -250,57 +282,18 @@ to ensure well-formedness of your document after an edit session.
You will have to restart your cluster after making this edit. Ignore the chicken-little
comment you'll find in the hdfs-default.xml in the
- description for the dfs.support.append configuration; it says it is not enabled because there
- are ... bugs in the 'append code' and is not supported in any production
- cluster.. This comment is stale, from another era, and while I'm sure there
- are bugs, the sync/append code has been running
- in production at large scale deploys and is on
- by default in the offerings of hadoop by commercial vendors
- Until recently only the
- branch-0.20-append
- branch had a working sync but no official release was ever made from this branch.
- You had to build it yourself. Michael Noll wrote a detailed blog,
- Building
- an Hadoop 0.20.x version for HBase 0.90.2, on how to build an
- Hadoop from branch-0.20-append. Recommended.
- Praveen Kumar has written
- a complimentary article,
- Building Hadoop and HBase for HBase Maven application development.
-Cloudera have dfs.support.append set to true by default..
-
-Or use the
- Cloudera or
- MapR distributions.
- Cloudera' CDH3
- is Apache Hadoop 0.20.x plus patches including all of the
- branch-0.20-append
- additions needed to add a durable sync. Use the released, most recent version of CDH3.
-
- MapR
- includes a commercial, reimplementation of HDFS.
- It has a durable sync as well as some other interesting features that are not
- yet in Apache Hadoop. Their M3
- product is free to use and unlimited.
-
-
- Because HBase depends on Hadoop, it bundles an instance of the
- Hadoop jar under its lib directory. The bundled jar is ONLY for use in standalone mode.
- In distributed mode, it is critical that the version of Hadoop that is out
- on your cluster match what is under HBase. Replace the hadoop jar found in the HBase
- lib directory with the hadoop jar you are running on
- your cluster to avoid version mismatch issues. Make sure you
- replace the jar in HBase everywhere on your cluster. Hadoop version
- mismatch issues have various manifestations but often all looks like
- its hung up.
-
+ description for the dfs.support.append configuration.
+
+
- Hadoop Security
- HBase will run on any Hadoop 0.20.x that incorporates Hadoop
- security features -- e.g. Y! 0.20S or CDH3B3 -- as long as you do as
+ Apache HBase on Secure Hadoop
+ Apache HBase will run on any Hadoop 0.20.x that incorporates Hadoop
+ security features as long as you do as
suggested above and replace the Hadoop jar that ships with HBase
- with the secure version.
+ with the secure version. If you want to read more about how to setup
+ Secure HBase, see .
-
+
dfs.datanode.max.xcieversxcievers
@@ -331,9 +324,12 @@ to ensure well-formedness of your document after an edit session.
java.io.IOException: No live nodes contain current block. Will get new
block locations from namenode and retry...
See Hadoop HDFS: Deceived by Xciever for an informative rant on xceivering.
+ See also
+
-
+
+
HBase run modes: Standalone and Distributed
@@ -376,7 +372,7 @@ to ensure well-formedness of your document after an edit session.
Distributed modes require an instance of the Hadoop
Distributed File System (HDFS). See the Hadoop
+ xlink:href="http://hadoop.apache.org/common/docs/r1.1.1/api/overview-summary.html#overview_description">
requirements and instructions for how to set up a HDFS. Before
proceeding, ensure you have an appropriate, working HDFS.
@@ -395,57 +391,92 @@ to ensure well-formedness of your document after an edit session.
HBase. Do not use this configuration for production nor for
evaluating HBase performance.
- Once you have confirmed your HDFS setup, edit
- conf/hbase-site.xml. This is the file into
+ First, setup your HDFS in pseudo-distributed mode.
+
+ Next, configure HBase. Below is an example conf/hbase-site.xml.
+ This is the file into
which you add local customizations and overrides for
- and . Point HBase at the running Hadoop HDFS
- instance by setting the hbase.rootdir property.
- This property points HBase at the Hadoop filesystem instance to use.
- For example, adding the properties below to your
- hbase-site.xml says that HBase should use the
- /hbase directory in the HDFS whose namenode is
- at port 8020 on your local machine, and that it should run with one
- replica only (recommended for pseudo-distributed mode):
+ and .
+ Note that the hbase.rootdir property points to the
+ local HDFS instance.
+
-
+ Now skip to for how to start and verify your
+ pseudo-distributed install.
+ See Pseudo-distributed
+ mode extras for notes on how to start extra Masters and
+ RegionServers when running pseudo-distributed.
+
+
+
+ Let HBase create the hbase.rootdir
+ directory. If you don't, you'll get warning saying HBase needs a
+ migration run because the directory is missing files expected by
+ HBase (it'll create them if you let it).
+
+
+
+ Pseudo-distributed Configuration File
+ Below is a sample pseudo-distributed file for the node h-24-30.example.com.
+hbase-site.xml
+
<configuration>
...
<property>
<name>hbase.rootdir</name>
- <value>hdfs://localhost:8020/hbase</value>
- <description>The directory shared by RegionServers.
- </description>
+ <value>hdfs://h-24-30.sfo.stumble.net:8020/hbase</value>
</property>
<property>
- <name>dfs.replication</name>
- <value>1</value>
- <description>The replication count for HLog and HFile storage. Should not be greater than HDFS datanode count.
- </description>
+ <name>hbase.cluster.distributed</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>hbase.zookeeper.quorum</name>
+ <value>h-24-30.sfo.stumble.net</value>
</property>
...
</configuration>
+
-
- Let HBase create the hbase.rootdir
- directory. If you don't, you'll get warning saying HBase needs a
- migration run because the directory is missing files expected by
- HBase (it'll create them if you let it).
-
+
-
- Above we bind to localhost. This means
- that a remote client cannot connect. Amend accordingly, if you
- want to connect from a remote location.
-
+
+ Pseudo-distributed Extras
+
+
+ Startup
+ To start up the initial HBase cluster...
+ % bin/start-hbase.sh
+
+ To start up an extra backup master(s) on the same server run...
+ % bin/local-master-backup.sh start 1
+ ... the '1' means use ports 60001 & 60011, and this backup master's logfile will be at logs/hbase-${USER}-1-master-${HOSTNAME}.log.
+
+ To startup multiple backup masters run... % bin/local-master-backup.sh start 2 3 You can start up to 9 backup masters (10 total).
+
+ To start up more regionservers...
+ % bin/local-regionservers.sh start 1
+ where '1' means use ports 60201 & 60301 and its logfile will be at logs/hbase-${USER}-1-regionserver-${HOSTNAME}.log.
+
+ To add 4 more regionservers in addition to the one you just started by running... % bin/local-regionservers.sh start 2 3 4 5
+ This supports up to 99 extra regionservers (100 total).
+
+
+
+ Stop
+ Assuming you want to stop master backup # 1, run...
+ % cat /tmp/hbase-${USER}-1-master.pid |xargs kill -9
+ Note that bin/local-master-backup.sh stop 1 will try to stop the cluster along with the master.
+
+ To stop an individual regionserver, run...
+ % bin/local-regionservers.sh stop 1
+
+
+
+
+
- Now skip to for how to start and verify your
- pseudo-distributed install.
- See Pseudo-distributed
- mode extras for notes on how to start extra Masters and
- RegionServers when running pseudo-distributed.
-
@@ -542,7 +573,7 @@ to ensure well-formedness of your document after an edit session.
Running and Confirming Your Installation
-
+
Make sure HDFS is running first. Start and stop the Hadoop HDFS
daemons by running bin/start-hdfs.sh over in the
@@ -552,31 +583,31 @@ to ensure well-formedness of your document after an edit session.
not normally use the mapreduce daemons. These do not need to be
started.
-
+
If you are managing your own ZooKeeper,
start it and confirm its running else, HBase will start up ZooKeeper
for you as part of its start process.
-
+
Start HBase with the following command:
-
+
bin/start-hbase.sh
- Run the above from the
+ Run the above from the
HBASE_HOME
- directory.
+ directory.
You should now have a running HBase instance. HBase logs can be
found in the logs subdirectory. Check them out
especially if HBase had trouble starting.
-
+
HBase also puts up a UI listing vital attributes. By default its
deployed on the Master host at port 60010 (HBase RegionServers listen
@@ -586,13 +617,13 @@ to ensure well-formedness of your document after an edit session.
Master's homepage you'd point your browser at
http://master.example.org:60010.
-
+
Once HBase has started, see the for how to
create tables, add data, scan your insertions, and finally disable and
drop your tables.
-
+
To stop HBase after exiting the HBase shell enter
$ ./bin/stop-hbase.sh
@@ -602,574 +633,15 @@ stopping hbase............... Shutdown can take a moment to
until HBase has shut down completely before stopping the Hadoop
daemons.
-
+
-
-
- ZooKeeper
- ZooKeeper
-
-
- A distributed HBase depends on a running ZooKeeper cluster.
- All participating nodes and clients need to be able to access the
- running ZooKeeper ensemble. HBase by default manages a ZooKeeper
- "cluster" for you. It will start and stop the ZooKeeper ensemble
- as part of the HBase start/stop process. You can also manage the
- ZooKeeper ensemble independent of HBase and just point HBase at
- the cluster it should use. To toggle HBase management of
- ZooKeeper, use the HBASE_MANAGES_ZK variable in
- conf/hbase-env.sh. This variable, which
- defaults to true, tells HBase whether to
- start/stop the ZooKeeper ensemble servers as part of HBase
- start/stop.
-
- When HBase manages the ZooKeeper ensemble, you can specify
- ZooKeeper configuration using its native
- zoo.cfg file, or, the easier option is to
- just specify ZooKeeper options directly in
- conf/hbase-site.xml. A ZooKeeper
- configuration option can be set as a property in the HBase
- hbase-site.xml XML configuration file by
- prefacing the ZooKeeper option name with
- hbase.zookeeper.property. For example, the
- clientPort setting in ZooKeeper can be changed
- by setting the
- hbase.zookeeper.property.clientPort property.
- For all default values used by HBase, including ZooKeeper
- configuration, see . Look for the
- hbase.zookeeper.property prefix
- For the full list of ZooKeeper configurations, see
- ZooKeeper's zoo.cfg. HBase does not ship
- with a zoo.cfg so you will need to browse
- the conf directory in an appropriate
- ZooKeeper download.
-
-
- You must at least list the ensemble servers in
- hbase-site.xml using the
- hbase.zookeeper.quorum property. This property
- defaults to a single ensemble member at
- localhost which is not suitable for a fully
- distributed HBase. (It binds to the local machine only and remote
- clients will not be able to connect).
- How many ZooKeepers should I run?
-
- You can run a ZooKeeper ensemble that comprises 1 node
- only but in production it is recommended that you run a
- ZooKeeper ensemble of 3, 5 or 7 machines; the more members an
- ensemble has, the more tolerant the ensemble is of host
- failures. Also, run an odd number of machines. In ZooKeeper,
- an even number of peers is supported, but it is normally not used
- because an even sized ensemble requires, proportionally, more peers
- to form a quorum than an odd sized ensemble requires. For example, an
- ensemble with 4 peers requires 3 to form a quorum, while an ensemble with
- 5 also requires 3 to form a quorum. Thus, an ensemble of 5 allows 2 peers to
- fail, and thus is more fault tolerant than the ensemble of 4, which allows
- only 1 down peer.
-
- Give each ZooKeeper server around 1GB of RAM, and if possible, its own
- dedicated disk (A dedicated disk is the best thing you can do
- to ensure a performant ZooKeeper ensemble). For very heavily
- loaded clusters, run ZooKeeper servers on separate machines
- from RegionServers (DataNodes and TaskTrackers).
-
-
- For example, to have HBase manage a ZooKeeper quorum on
- nodes rs{1,2,3,4,5}.example.com, bound to
- port 2222 (the default is 2181) ensure
- HBASE_MANAGE_ZK is commented out or set to
- true in conf/hbase-env.sh
- and then edit conf/hbase-site.xml and set
- hbase.zookeeper.property.clientPort and
- hbase.zookeeper.quorum. You should also set
- hbase.zookeeper.property.dataDir to other than
- the default as the default has ZooKeeper persist data under
- /tmp which is often cleared on system
- restart. In the example below we have ZooKeeper persist to
- /user/local/zookeeper.
- <configuration>
- ...
- <property>
- <name>hbase.zookeeper.property.clientPort</name>
- <value>2222</value>
- <description>Property from ZooKeeper's config zoo.cfg.
- The port at which the clients will connect.
- </description>
- </property>
- <property>
- <name>hbase.zookeeper.quorum</name>
- <value>rs1.example.com,rs2.example.com,rs3.example.com,rs4.example.com,rs5.example.com</value>
- <description>Comma separated list of servers in the ZooKeeper Quorum.
- For example, "host1.mydomain.com,host2.mydomain.com,host3.mydomain.com".
- By default this is set to localhost for local and pseudo-distributed modes
- of operation. For a fully-distributed setup, this should be set to a full
- list of ZooKeeper quorum servers. If HBASE_MANAGES_ZK is set in hbase-env.sh
- this is the list of servers which we will start/stop ZooKeeper on.
- </description>
- </property>
- <property>
- <name>hbase.zookeeper.property.dataDir</name>
- <value>/usr/local/zookeeper</value>
- <description>Property from ZooKeeper's config zoo.cfg.
- The directory where the snapshot is stored.
- </description>
- </property>
- ...
- </configuration>
-
-
- Using existing ZooKeeper ensemble
-
- To point HBase at an existing ZooKeeper cluster, one that
- is not managed by HBase, set HBASE_MANAGES_ZK
- in conf/hbase-env.sh to false
-
- ...
- # Tell HBase whether it should manage it's own instance of Zookeeper or not.
- export HBASE_MANAGES_ZK=false Next set ensemble locations
- and client port, if non-standard, in
- hbase-site.xml, or add a suitably
- configured zoo.cfg to HBase's
- CLASSPATH. HBase will prefer the
- configuration found in zoo.cfg over any
- settings in hbase-site.xml.
-
- When HBase manages ZooKeeper, it will start/stop the
- ZooKeeper servers as a part of the regular start/stop scripts.
- If you would like to run ZooKeeper yourself, independent of
- HBase start/stop, you would do the following
-
-
-${HBASE_HOME}/bin/hbase-daemons.sh {start,stop} zookeeper
-
-
- Note that you can use HBase in this manner to spin up a
- ZooKeeper cluster, unrelated to HBase. Just make sure to set
- HBASE_MANAGES_ZK to false
- if you want it to stay up across HBase restarts so that when
- HBase shuts down, it doesn't take ZooKeeper down with it.
-
- For more information about running a distinct ZooKeeper
- cluster, see the ZooKeeper Getting
- Started Guide. Additionally, see the ZooKeeper Wiki or the
- ZooKeeper documentation
- for more information on ZooKeeper sizing.
-
-
-
-
-
- SASL Authentication with ZooKeeper
- Newer releases of HBase (>= 0.92) will
- support connecting to a ZooKeeper Quorum that supports
- SASL authentication (which is available in Zookeeper
- versions 3.4.0 or later).
-
- This describes how to set up HBase to mutually
- authenticate with a ZooKeeper Quorum. ZooKeeper/HBase
- mutual authentication (HBASE-2418)
- is required as part of a complete secure HBase configuration
- (HBASE-3025).
-
- For simplicity of explication, this section ignores
- additional configuration required (Secure HDFS and Coprocessor
- configuration). It's recommended to begin with an
- HBase-managed Zookeeper configuration (as opposed to a
- standalone Zookeeper quorum) for ease of learning.
-
-
- Operating System Prerequisites
-
-
- You need to have a working Kerberos KDC setup. For
- each $HOST that will run a ZooKeeper
- server, you should have a principle
- zookeeper/$HOST. For each such host,
- add a service key (using the kadmin or
- kadmin.local tool's ktadd
- command) for zookeeper/$HOST and copy
- this file to $HOST, and make it
- readable only to the user that will run zookeeper on
- $HOST. Note the location of this file,
- which we will use below as
- $PATH_TO_ZOOKEEPER_KEYTAB.
-
-
-
- Similarly, for each $HOST that will run
- an HBase server (master or regionserver), you should
- have a principle: hbase/$HOST. For each
- host, add a keytab file called
- hbase.keytab containing a service
- key for hbase/$HOST, copy this file to
- $HOST, and make it readable only to the
- user that will run an HBase service on
- $HOST. Note the location of this file,
- which we will use below as
- $PATH_TO_HBASE_KEYTAB.
-
-
-
- Each user who will be an HBase client should also be
- given a Kerberos principal. This principal should
- usually have a password assigned to it (as opposed to,
- as with the HBase servers, a keytab file) which only
- this user knows. The client's principal's
- maxrenewlife should be set so that it can
- be renewed enough so that the user can complete their
- HBase client processes. For example, if a user runs a
- long-running HBase client process that takes at most 3
- days, we might create this user's principal within
- kadmin with: addprinc -maxrenewlife
- 3days. The Zookeeper client and server
- libraries manage their own ticket refreshment by
- running threads that wake up periodically to do the
- refreshment.
-
-
- On each host that will run an HBase client
- (e.g. hbase shell), add the following
- file to the HBase home directory's conf
- directory:
-
-
- Client {
- com.sun.security.auth.module.Krb5LoginModule required
- useKeyTab=false
- useTicketCache=true;
- };
-
-
- We'll refer to this JAAS configuration file as
- $CLIENT_CONF below.
-
-
- HBase-managed Zookeeper Configuration
-
- On each node that will run a zookeeper, a
- master, or a regionserver, create a JAAS
- configuration file in the conf directory of the node's
- HBASE_HOME directory that looks like the
- following:
-
-
- Server {
- com.sun.security.auth.module.Krb5LoginModule required
- useKeyTab=true
- keyTab="$PATH_TO_ZOOKEEPER_KEYTAB"
- storeKey=true
- useTicketCache=false
- principal="zookeeper/$HOST";
- };
- Client {
- com.sun.security.auth.module.Krb5LoginModule required
- useKeyTab=true
- useTicketCache=false
- keyTab="$PATH_TO_HBASE_KEYTAB"
- principal="hbase/$HOST";
- };
-
-
- where the $PATH_TO_HBASE_KEYTAB and
- $PATH_TO_ZOOKEEPER_KEYTAB files are what
- you created above, and $HOST is the hostname for that
- node.
-
- The Server section will be used by
- the Zookeeper quorum server, while the
- Client section will be used by the HBase
- master and regionservers. The path to this file should
- be substituted for the text $HBASE_SERVER_CONF
- in the hbase-env.sh
- listing below.
-
-
- The path to this file should be substituted for the
- text $CLIENT_CONF in the
- hbase-env.sh listing below.
-
-
- Modify your hbase-env.sh to include the
- following:
-
-
- export HBASE_OPTS="-Djava.security.auth.login.config=$CLIENT_CONF"
- export HBASE_MANAGES_ZK=true
- export HBASE_ZOOKEEPER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_CONF"
- export HBASE_MASTER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_CONF"
- export HBASE_REGIONSERVER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_CONF"
-
-
- where $HBASE_SERVER_CONF and
- $CLIENT_CONF are the full paths to the
- JAAS configuration files created above.
-
- Modify your hbase-site.xml on each node
- that will run zookeeper, master or regionserver to contain:
-
-
-
- hbase.zookeeper.quorum
- $ZK_NODES
-
-
- hbase.cluster.distributed
- true
-
-
- hbase.zookeeper.property.authProvider.1
- org.apache.zookeeper.server.auth.SASLAuthenticationProvider
-
-
- hbase.zookeeper.property.kerberos.removeHostFromPrincipal
- true
-
-
- hbase.zookeeper.property.kerberos.removeRealmFromPrincipal
- true
-
-
- ]]>
-
- where $ZK_NODES is the
- comma-separated list of hostnames of the Zookeeper
- Quorum hosts.
-
- Start your hbase cluster by running one or more
- of the following set of commands on the appropriate
- hosts:
-
-
-
- bin/hbase zookeeper start
- bin/hbase master start
- bin/hbase regionserver start
-
-
-
-
- External Zookeeper Configuration
- Add a JAAS configuration file that looks like:
-
-
- Client {
- com.sun.security.auth.module.Krb5LoginModule required
- useKeyTab=true
- useTicketCache=false
- keyTab="$PATH_TO_HBASE_KEYTAB"
- principal="hbase/$HOST";
- };
-
-
- where the $PATH_TO_HBASE_KEYTAB is the keytab
- created above for HBase services to run on this host, and $HOST is the
- hostname for that node. Put this in the HBase home's
- configuration directory. We'll refer to this file's
- full pathname as $HBASE_SERVER_CONF below.
-
- Modify your hbase-env.sh to include the following:
-
-
- export HBASE_OPTS="-Djava.security.auth.login.config=$CLIENT_CONF"
- export HBASE_MANAGES_ZK=false
- export HBASE_MASTER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_CONF"
- export HBASE_REGIONSERVER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_CONF"
-
-
-
- Modify your hbase-site.xml on each node
- that will run a master or regionserver to contain:
-
-
-
- hbase.zookeeper.quorum
- $ZK_NODES
-
-
- hbase.cluster.distributed
- true
-
-
- ]]>
-
-
- where $ZK_NODES is the
- comma-separated list of hostnames of the Zookeeper
- Quorum hosts.
-
-
- Add a zoo.cfg for each Zookeeper Quorum host containing:
-
- authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider
- kerberos.removeHostFromPrincipal=true
- kerberos.removeRealmFromPrincipal=true
-
-
- Also on each of these hosts, create a JAAS configuration file containing:
-
-
- Server {
- com.sun.security.auth.module.Krb5LoginModule required
- useKeyTab=true
- keyTab="$PATH_TO_ZOOKEEPER_KEYTAB"
- storeKey=true
- useTicketCache=false
- principal="zookeeper/$HOST";
- };
-
-
- where $HOST is the hostname of each
- Quorum host. We will refer to the full pathname of
- this file as $ZK_SERVER_CONF below.
-
-
-
-
- Start your Zookeepers on each Zookeeper Quorum host with:
-
-
- SERVER_JVMFLAGS="-Djava.security.auth.login.config=$ZK_SERVER_CONF" bin/zkServer start
-
-
-
-
-
- Start your HBase cluster by running one or more of the following set of commands on the appropriate nodes:
-
-
-
- bin/hbase master start
- bin/hbase regionserver start
-
-
-
-
-
-
- Zookeeper Server Authentication Log Output
- If the configuration above is successful,
- you should see something similar to the following in
- your Zookeeper server logs:
-
-11/12/05 22:43:39 INFO zookeeper.Login: successfully logged in.
-11/12/05 22:43:39 INFO server.NIOServerCnxnFactory: binding to port 0.0.0.0/0.0.0.0:2181
-11/12/05 22:43:39 INFO zookeeper.Login: TGT refresh thread started.
-11/12/05 22:43:39 INFO zookeeper.Login: TGT valid starting at: Mon Dec 05 22:43:39 UTC 2011
-11/12/05 22:43:39 INFO zookeeper.Login: TGT expires: Tue Dec 06 22:43:39 UTC 2011
-11/12/05 22:43:39 INFO zookeeper.Login: TGT refresh sleeping until: Tue Dec 06 18:36:42 UTC 2011
-..
-11/12/05 22:43:59 INFO auth.SaslServerCallbackHandler:
- Successfully authenticated client: authenticationID=hbase/ip-10-166-175-249.us-west-1.compute.internal@HADOOP.LOCALDOMAIN;
- authorizationID=hbase/ip-10-166-175-249.us-west-1.compute.internal@HADOOP.LOCALDOMAIN.
-11/12/05 22:43:59 INFO auth.SaslServerCallbackHandler: Setting authorizedID: hbase
-11/12/05 22:43:59 INFO server.ZooKeeperServer: adding SASL authorization for authorizationID: hbase
-
-
-
-
-
-
-
- Zookeeper Client Authentication Log Output
- On the Zookeeper client side (HBase master or regionserver),
- you should see something similar to the following:
-
-
-11/12/05 22:43:59 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=ip-10-166-175-249.us-west-1.compute.internal:2181 sessionTimeout=180000 watcher=master:60000
-11/12/05 22:43:59 INFO zookeeper.ClientCnxn: Opening socket connection to server /10.166.175.249:2181
-11/12/05 22:43:59 INFO zookeeper.RecoverableZooKeeper: The identifier of this process is 14851@ip-10-166-175-249
-11/12/05 22:43:59 INFO zookeeper.Login: successfully logged in.
-11/12/05 22:43:59 INFO client.ZooKeeperSaslClient: Client will use GSSAPI as SASL mechanism.
-11/12/05 22:43:59 INFO zookeeper.Login: TGT refresh thread started.
-11/12/05 22:43:59 INFO zookeeper.ClientCnxn: Socket connection established to ip-10-166-175-249.us-west-1.compute.internal/10.166.175.249:2181, initiating session
-11/12/05 22:43:59 INFO zookeeper.Login: TGT valid starting at: Mon Dec 05 22:43:59 UTC 2011
-11/12/05 22:43:59 INFO zookeeper.Login: TGT expires: Tue Dec 06 22:43:59 UTC 2011
-11/12/05 22:43:59 INFO zookeeper.Login: TGT refresh sleeping until: Tue Dec 06 18:30:37 UTC 2011
-11/12/05 22:43:59 INFO zookeeper.ClientCnxn: Session establishment complete on server ip-10-166-175-249.us-west-1.compute.internal/10.166.175.249:2181, sessionid = 0x134106594320000, negotiated timeout = 180000
-
-
-
-
-
- Configuration from Scratch
-
- This has been tested on the current standard Amazon
- Linux AMI. First setup KDC and principals as
- described above. Next checkout code and run a sanity
- check.
-
-
- git clone git://git.apache.org/hbase.git
- cd hbase
- mvn -Psecurity,localTests clean test -Dtest=TestZooKeeperACL
-
-
- Then configure HBase as described above.
- Manually edit target/cached_classpath.txt (see below)..
-
-
- bin/hbase zookeeper &
- bin/hbase master &
- bin/hbase regionserver &
-
-
-
-
-
- Future improvements
-
- Fix target/cached_classpath.txt
-
- You must override the standard hadoop-core jar file from the
- target/cached_classpath.txt
- file with the version containing the HADOOP-7070 fix. You can use the following script to do this:
-
-
- echo `find ~/.m2 -name "*hadoop-core*7070*SNAPSHOT.jar"` ':' `cat target/cached_classpath.txt` | sed 's/ //g' > target/tmp.txt
- mv target/tmp.txt target/cached_classpath.txt
-
-
-
-
-
-
-
- Set JAAS configuration
- programmatically
-
-
- This would avoid the need for a separate Hadoop jar
- that fixes HADOOP-7070.
-
-
-
- Elimination of
- kerberos.removeHostFromPrincipal and
- kerberos.removeRealmFromPrincipal
-
-
-
-
-
-
-
-
-
-
-
-
+ Configuration Files
-
+
hbase-site.xml and hbase-default.xmlJust as in Hadoop where you add site-specific HDFS configuration
@@ -1197,7 +669,7 @@ ${HBASE_HOME}/bin/hbase-daemons.sh {start,stop} zookeeper
The generated file is a docbook section with a glossary
in it-->
+ href="../../src/main/resources/hbase-default.xml" />
@@ -1242,8 +714,17 @@ ${HBASE_HOME}/bin/hbase-daemons.sh {start,stop} zookeeper
used by tests).
- Minimally, a client of HBase needs the hbase, hadoop, log4j, commons-logging, commons-lang,
- and ZooKeeper jars in its CLASSPATH connecting to a cluster.
+ Minimally, a client of HBase needs several libraries in its CLASSPATH when connecting to a cluster, including:
+
+commons-configuration (commons-configuration-1.6.jar)
+commons-lang (commons-lang-2.5.jar)
+commons-logging (commons-logging-1.1.1.jar)
+hadoop-core (hadoop-core-1.0.0.jar)
+hbase (hbase-0.92.0.jar)
+log4j (log4j-1.2.16.jar)
+slf4j-api (slf4j-api-1.5.8.jar)
+slf4j-log4j (slf4j-log4j12-1.5.8.jar)
+zookeeper (zookeeper-3.4.2.jar)
An example basic hbase-site.xml for client only
@@ -1261,7 +742,7 @@ ${HBASE_HOME}/bin/hbase-daemons.sh {start,stop} zookeeper
]]>
-
+
Java client configurationThe configuration used by a Java client is kept
@@ -1270,15 +751,15 @@ ${HBASE_HOME}/bin/hbase-daemons.sh {start,stop} zookeeper
on invocation, will read in the content of the first hbase-site.xml found on
the client's CLASSPATH, if one is present
(Invocation will also factor in any hbase-default.xml found;
- an hbase-default.xml ships inside the hbase.X.X.X.jar).
+ an hbase-default.xml ships inside the hbase.X.X.X.jar).
It is also possible to specify configuration directly without having to read from a
hbase-site.xml. For example, to set the ZooKeeper
ensemble for the cluster programmatically do as follows:
Configuration config = HBaseConfiguration.create();
-config.set("hbase.zookeeper.quorum", "localhost"); // Here we are running zookeeper locally
+config.set("hbase.zookeeper.quorum", "localhost"); // Here we are running zookeeper locally
If multiple ZooKeeper instances make up your ZooKeeper ensemble,
they may be specified in a comma-separated list (just as in the hbase-site.xml file).
- This populated Configuration instance can then be passed to an
+ This populated Configuration instance can then be passed to an
HTable,
and so on.
@@ -1286,7 +767,7 @@ config.set("hbase.zookeeper.quorum", "localhost"); // Here we are running zooke
-
+
Example Configurations
@@ -1378,7 +859,7 @@ config.set("hbase.zookeeper.quorum", "localhost"); // Here we are running zooke
1G.
-
+
$ git diff hbase-env.sh
diff --git a/conf/hbase-env.sh b/conf/hbase-env.sh
index e70ebc6..96f8c27 100644
@@ -1386,11 +867,11 @@ index e70ebc6..96f8c27 100644
+++ b/conf/hbase-env.sh
@@ -31,7 +31,7 @@ export JAVA_HOME=/usr/lib//jvm/java-6-sun/
# export HBASE_CLASSPATH=
-
+
# The maximum amount of heap to use, in MB. Default is 1000.
-# export HBASE_HEAPSIZE=1000
+export HBASE_HEAPSIZE=4096
-
+
# Extra Java runtime options.
# Below are what we set by default. May only work with SUN JVM.
@@ -1402,8 +883,8 @@ index e70ebc6..96f8c27 100644
-
-
+
+
The Important ConfigurationsBelow we list what the important
@@ -1415,9 +896,23 @@ index e70ebc6..96f8c27 100644
Required ConfigurationsReview the and sections.
+ Big Cluster Configurations
+ If a cluster with a lot of regions, it is possible if an eager beaver
+ regionserver checks in soon after master start while all the rest in the
+ cluster are laggardly, this first server to checkin will be assigned all
+ regions. If lots of regions, this first server could buckle under the
+ load. To prevent the above scenario happening up the
+ hbase.master.wait.on.regionservers.mintostart from its
+ default value of 1. See
+ HBASE-6389 Modify the conditions to ensure that Master waits for sufficient number of Region Servers before starting region assignments
+ for more detail.
+
+ Recommended Configurations
+
+ ZooKeeper Configurationzookeeper.session.timeoutThe default timeout is three minutes (specified in milliseconds). This means
that if a server crashes, it will be three minutes before the Master notices
@@ -1427,7 +922,7 @@ index e70ebc6..96f8c27 100644
configuration under control otherwise, a long garbage collection that lasts
beyond the ZooKeeper session timeout will take out
your RegionServer (You might be fine with this -- you probably want recovery to start
- on the server if a RegionServer has been in GC for a long period of time).
+ on the server if a RegionServer has been in GC for a long period of time).To change this configuration, edit hbase-site.xml,
copy the changed file around the cluster and restart.
@@ -1443,6 +938,18 @@ index e70ebc6..96f8c27 100644
Number of ZooKeeper InstancesSee .
+
+
+
+ HDFS Configurations
+
+ dfs.datanode.failed.volumes.tolerated
+ This is the "...number of volumes that are allowed to fail before a datanode stops offering service. By default
+ any volume failure will cause a datanode to shutdown" from the hdfs-default.xml
+ description. If you have > three or four disks, you might want to set this to 1 or if you have many disks,
+ two or more.
+
+ hbase.regionserver.handler.count
@@ -1503,7 +1010,7 @@ index e70ebc6..96f8c27 100644
cluster (You can always later manually split the big Regions should one prove
hot and you want to spread the request load over the cluster). A lower number of regions is
preferred, generally in the range of 20 to low-hundreds
- per RegionServer. Adjust the regionsize as appropriate to achieve this number.
+ per RegionServer. Adjust the regionsize as appropriate to achieve this number.
For the 0.90.x codebase, the upper-bound of regionsize is about 4Gb, with a default of 256Mb.
For 0.92.x codebase, due to the HFile v2 change much larger regionsizes can be supported (e.g., 20Gb).
@@ -1511,10 +1018,58 @@ index e70ebc6..96f8c27 100644
You may need to experiment with this setting based on your hardware configuration and application needs.
Adjust hbase.hregion.max.filesize in your hbase-site.xml.
- RegionSize can also be set on a per-table basis via
+ RegionSize can also be set on a per-table basis via
HTableDescriptor.
-
+
+ How many regions per RegionServer?
+
+ Typically you want to keep your region count low on HBase for numerous reasons.
+ Usually right around 100 regions per RegionServer has yielded the best results.
+ Here are some of the reasons below for keeping region count low:
+
+
+ MSLAB requires 2mb per memstore (that's 2mb per family per region).
+ 1000 regions that have 2 families each is 3.9GB of heap used, and it's not even storing data yet. NB: the 2MB value is configurable.
+
+ If you fill all the regions at somewhat the same rate, the global memory usage makes it that it forces tiny
+ flushes when you have too many regions which in turn generates compactions.
+ Rewriting the same data tens of times is the last thing you want.
+ An example is filling 1000 regions (with one family) equally and let's consider a lower bound for global memstore
+ usage of 5GB (the region server would have a big heap).
+ Once it reaches 5GB it will force flush the biggest region,
+ at that point they should almost all have about 5MB of data so
+ it would flush that amount. 5MB inserted later, it would flush another
+ region that will now have a bit over 5MB of data, and so on.
+ A basic formula for the amount of regions to have per region server would
+ look like this:
+ Heap * upper global memstore limit = amount of heap devoted to memstore
+ then the amount of heap devoted to memstore / (Number of regions per RS * CFs).
+ This will give you the rough memstore size if everything is being written to.
+ A more accurate formula is
+ Heap * upper global memstore limit = amount of heap devoted to memstore then the
+ amount of heap devoted to memstore / (Number of actively written regions per RS * CFs).
+ This can allot you a higher region count from the write perspective if you know how many
+ regions you will be writing to at one time.
+
+ The master as is is allergic to tons of regions, and will
+ take a lot of time assigning them and moving them around in batches.
+ The reason is that it's heavy on ZK usage, and it's not very async
+ at the moment (could really be improved -- and has been imporoved a bunch
+ in 0.96 hbase).
+
+
+ In older versions of HBase (pre-v2 hfile, 0.90 and previous), tons of regions
+ on a few RS can cause the store file index to rise raising heap usage and can
+ create memory pressure or OOME on the RSs
+
+
+
+ Another issue is the effect of the number of regions on mapreduce jobs.
+ Keeping 5 regions per RS would be too low for a job, whereas 1000 will generate too many maps.
+
+
+
Managed Splitting
@@ -1567,23 +1122,30 @@ of all regions.
Managed Compactions
- A common administrative technique is to manage major compactions manually, rather than letting
+ A common administrative technique is to manage major compactions manually, rather than letting
HBase do it. By default, HConstants.MAJOR_COMPACTION_PERIOD is one day and major compactions
may kick in when you least desire it - especially on a busy system. To turn off automatic major compactions set
- the value to 0.
+ the value to 0.
It is important to stress that major compactions are absolutely necessary for StoreFile cleanup, the only variant is when
- they occur. They can be administered through the HBase shell, or via
+ they occur. They can be administered through the HBase shell, or via
HBaseAdmin.
For more information about compactions and the compaction file selection process, see
-
+
+ Speculative Execution
+ Speculative Execution of MapReduce tasks is on by default, and for HBase clusters it is generally advised to turn off
+ Speculative Execution at a system-level unless you need it for a specific case, where it can be configured per-job.
+ Set the properties mapred.map.tasks.speculative.execution and
+ mapred.reduce.tasks.speculative.execution to false.
+
+ Other ConfigurationsBalancer
- The balancer is periodic operation run on the master to redistribute regions on the cluster. It is configured via
+ The balancer is a periodic operation which is run on the master to redistribute regions on the cluster. It is configured via
hbase.balancer.period and defaults to 300000 (5 minutes). See for more information on the LoadBalancer.
@@ -1596,38 +1158,18 @@ of all regions.
on the size you need by surveying regionserver UIs; you'll see index block size accounted near the
top of the webpage).
-
-
-
-
-
- Bloom Filter Configuration
-
- io.hfile.bloom.enabled global kill
- switch
-
- io.hfile.bloom.enabled in
- Configuration serves as the kill switch in case
- something goes wrong. Default = true.
-
-
-
- io.hfile.bloom.error.rate
+
+ Nagle's or the small package problem
+ If a big 40ms or so occasional delay is seen in operations against HBase,
+ try the Nagles' setting. For example, see the user mailing list thread,
+ Inconsistent scan performance with caching set to 1
+ and the issue cited therein where setting notcpdelay improved scan speeds. You might also
+ see the graphs on the tail of HBASE-7008 Set scanner caching to a better default
+ where our Lars Hofhansl tries various data sizes w/ Nagle's on and off measuring the effect.
+
- io.hfile.bloom.error.rate = average false
- positive rate. Default = 1%. Decrease rate by ½ (e.g. to .5%) == +1
- bit per bloom entry.
-
+
-
- io.hfile.bloom.max.fold
+
- io.hfile.bloom.max.fold = guaranteed minimum
- fold rate. Most people should leave this alone. Default = 7, or can
- collapse to at least 1/128th of original size. See the
- Development Process section of the document BloomFilters
- in HBase for more on what this option means.
-
-
diff --git a/src/docbkx/customization.xsl b/src/docbkx/customization.xsl
index d80a2b5abd61..a5065a48ff93 100644
--- a/src/docbkx/customization.xsl
+++ b/src/docbkx/customization.xsl
@@ -20,15 +20,29 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-This stylesheet is used making an html version of hbase-default.xml.
-->
+
+
+
+
+comments powered by Disqus
diff --git a/src/docbkx/developer.xml b/src/docbkx/developer.xml
index 0c139b843ed0..854b6f6038fe 100644
--- a/src/docbkx/developer.xml
+++ b/src/docbkx/developer.xml
@@ -26,126 +26,266 @@
* limitations under the License.
*/
-->
- Building and Developing HBase
- This chapter will be of interest only to those building and developing HBase (i.e., as opposed to
+ Building and Developing Apache HBase (TM)
+ This chapter will be of interest only to those building and developing Apache HBase (TM) (i.e., as opposed to
just downloading the latest distribution).
- HBase Repositories
+ Apache HBase Repositories
+ There are two different repositories for Apache HBase: Subversion (SVN) and Git. The former is the system of record for committers, but the latter is easier to work with to build and contribute. SVN updates get automatically propagated to the Git repo.SVN
-svn co http://svn.apache.org/repos/asf/hbase/trunk hbase-core-trunk
+svn co http://svn.apache.org/repos/asf/hbase/trunk hbase-core-trunk
-
+ Git
git clone git://git.apache.org/hbase.git
-
-
-
-
+
+
+
+ IDEsEclipseCode Formatting
- See HBASE-3678 Add Eclipse-based Apache Formatter to HBase Wiki
- for an Eclipse formatter to help ensure your code conforms to HBase'y coding convention.
- The issue includes instructions for loading the attached formatter.
+ Under the dev-support folder, you will find hbase_eclipse_formatter.xml.
+ We encourage you to have this formatter in place in eclipse when editing HBase code. To load it into eclipse:
+
+Go to Eclipse->Preferences...
+In Preferences, Go to Java->Code Style->Formatter
+Import... hbase_eclipse_formatter.xml
+Click Apply
+Still in Preferences, Go to Java->Editor->Save Actions
+Check the following:
+
+Perform the selected actions on save
+Format source code
+Format edited lines
+
+
+Click Apply
+
+
+ In addition to the automatic formatting, make sure you follow the style guidelines explained in Also, no @author tags - that's a rule. Quality Javadoc comments are appreciated. And include the Apache license.
-
+ Subversive PluginDownload and install the Subversive plugin.Set up an SVN Repository target from , then check out the code.
-
+
+
+ Git Plugin
+ If you cloned the project via git, download and install the Git plugin (EGit). Attach to your local git repo (via the Git Repositories window) and you'll be able to see file revision history, generate patches, etc.
+
- HBase Project Setup
- To set up your Eclipse environment for HBase, close Eclipse and execute...
-
-mvn eclipse:eclipse
-
- ... from your local HBase project directory in your workspace to generate some new .project
- and .classpathfiles. Then reopen Eclipse.
-
-
- Maven Plugin
- Download and install the Maven plugin. For example, Help -> Install New Software -> (search for Maven Plugin)
-
+ HBase Project Setup in Eclipse
+ The easiest way is to use the m2eclipse plugin for Eclipse. Eclipse Indigo or newer has m2eclipse built-in, or it can be found here:http://www.eclipse.org/m2e/. M2Eclipse provides Maven integration for Eclipse - it even lets you use the direct Maven commands from within Eclipse to compile and test your project.
+ To import the project, you merely need to go to File->Import...Maven->Existing Maven Projects and then point Eclipse at the HBase root directory; m2eclipse will automatically find all the hbase modules for you.
+ If you install m2eclipse and import HBase in your workspace, you will have to fix your eclipse Build Path.
+ Remove target folder, add target/generated-jamon
+ and target/generated-sources/java folders. You may also remove from your Build Path
+ the exclusions on the src/main/resources and src/test/resources
+ to avoid error message in the console 'Failed to execute goal org.apache.maven.plugins:maven-antrun-plugin:1.6:run (default) on project hbase:
+ 'An Ant BuildException has occured: Replace: source file .../target/classes/hbase-default.xml doesn't exist'. This will also
+ reduce the eclipse build cycles and make your life easier when developing.
+
+
+ Import into eclipse with the command line
+ For those not inclined to use m2eclipse, you can generate the Eclipse files from the command line. First, run (you should only have to do this once):
+ mvn clean install -DskipTests
+ and then close Eclipse and execute...
+ mvn eclipse:eclipse
+ ... from your local HBase project directory in your workspace to generate some new .project
+ and .classpathfiles. Then reopen Eclipse, or refresh your eclipse project (F5), and import
+ the .project file in the HBase directory to a workspace.
+
+ Maven Classpath Variable
- The M2_REPO classpath variable needs to be set up for the project. This needs to be set to
+ The M2_REPO classpath variable needs to be set up for the project. This needs to be set to
your local Maven repository, which is usually ~/.m2/repository
If this classpath variable is not configured, you will see compile errors in Eclipse like this...
Description Resource Path Location Type
-The project cannot be built until build path errors are resolved hbase Unknown Java Problem
+The project cannot be built until build path errors are resolved hbase Unknown Java Problem
Unbound classpath variable: 'M2_REPO/asm/asm/3.1/asm-3.1.jar' in project 'hbase' hbase Build path Build Path Problem
-Unbound classpath variable: 'M2_REPO/com/github/stephenc/high-scale-lib/high-scale-lib/1.1.1/high-scale-lib-1.1.1.jar' in project 'hbase' hbase Build path Build Path Problem
+Unbound classpath variable: 'M2_REPO/com/github/stephenc/high-scale-lib/high-scale-lib/1.1.1/high-scale-lib-1.1.1.jar' in project 'hbase' hbase Build path Build Path Problem
Unbound classpath variable: 'M2_REPO/com/google/guava/guava/r09/guava-r09.jar' in project 'hbase' hbase Build path Build Path Problem
Unbound classpath variable: 'M2_REPO/com/google/protobuf/protobuf-java/2.3.0/protobuf-java-2.3.0.jar' in project 'hbase' hbase Build path Build Path Problem Unbound classpath variable:
-
+
-
- Import via m2eclipse
- If you install the m2eclipse and import the HBase pom.xml in your workspace, you will have to fix your eclipse Build Path.
- Remove target folder, add target/generated-jamon
- and target/generated-sources/java folders. You may also remove from your Build Path
- the exclusions on the src/main/resources and src/test/resources
- to avoid error message in the console 'Failed to execute goal org.apache.maven.plugins:maven-antrun-plugin:1.6:run (default) on project hbase:
- 'An Ant BuildException has occured: Replace: source file .../target/classes/hbase-default.xml doesn't exist'. This will also
- reduce the eclipse build cycles and make your life easier when developing.
- Eclipse Known IssuesEclipse will currently complain about Bytes.java. It is not possible to turn these errors off.
-
+
Description Resource Path Location Type
Access restriction: The method arrayBaseOffset(Class) from the type Unsafe is not accessible due to restriction on required library /System/Library/Java/JavaVirtualMachines/1.6.0.jdk/Contents/Classes/classes.jar Bytes.java /hbase/src/main/java/org/apache/hadoop/hbase/util line 1061 Java Problem
Access restriction: The method arrayIndexScale(Class) from the type Unsafe is not accessible due to restriction on required library /System/Library/Java/JavaVirtualMachines/1.6.0.jdk/Contents/Classes/classes.jar Bytes.java /hbase/src/main/java/org/apache/hadoop/hbase/util line 1064 Java Problem
Access restriction: The method getLong(Object, long) from the type Unsafe is not accessible due to restriction on required library /System/Library/Java/JavaVirtualMachines/1.6.0.jdk/Contents/Classes/classes.jar Bytes.java /hbase/src/main/java/org/apache/hadoop/hbase/util line 1111 Java Problem
-
+ Eclipse - More Information
- For additional information on setting up Eclipse for HBase development on Windows, see
+ For additional information on setting up Eclipse for HBase development on Windows, see
Michael Morello's blog on the topic.
-
-
+
+
- Building HBase
- This section will be of interest only to those building HBase from source.
-
+ Building Apache HBase
+
+ Basic Compile
+ Thanks to maven, building HBase is pretty easy. You can read about the various maven commands in , but the simplest command to compile HBase from its java source code is:
+
+mvn package -DskipTests
+
+ Or, to clean up before compiling:
+
+mvn clean package -DskipTests
+
+ With Eclipse set up as explained above in , you can also simply use the build command in Eclipse. To create the full installable HBase package takes a little bit more work, so read on.
+
+ Building in snappy compression supportPass -Dsnappy to trigger the snappy maven profile for building
- snappy native libs into hbase.
+ snappy native libs into hbase. See also Building the HBase tarballDo the following to build the HBase tarball.
- Passing the -Drelease will generate javadoc and run the RAT plugin to verify licenses on source.
- % MAVEN_OPTS="-Xmx2g" mvn clean site install assembly:single -Dmaven.test.skip -Prelease
+ Passing the -Prelease will generate javadoc and run the RAT plugin to verify licenses on source.
+ % MAVEN_OPTS="-Xmx2g" mvn clean site install assembly:assembly -DskipTests -Prelease
+
+ Build Gotchas
+ If you see Unable to find resource 'VM_global_library.vm', ignore it.
+ Its not an error. It is officially ugly though.
+
+
+
- Adding an HBase release to Apache's Maven Repository
+ Adding an Apache HBase release to Apache's Maven RepositoryFollow the instructions at
- Publishing Maven Artifacts.
- The 'trick' to making it all work is answering the questions put to you by the mvn release plugin properly,
- making sure it is using the actual branch AND before doing the mvn release:perform step,
- VERY IMPORTANT, hand edit the release.properties file that was put under ${HBASE_HOME}
- by the previous step, release:perform. You need to edit it to make it point at
- right locations in SVN.
+ Publishing Maven Artifacts after
+ reading the below miscellaney.
+
+ You must use maven 3.0.x (Check by running mvn -version).
+
+ Let me list out the commands I used first. The sections that follow dig in more
+ on what is going on. In this example, we are releasing the 0.92.2 jar to the apache
+ maven repository.
+
+ # First make a copy of the tag we want to release; presumes the release has been tagged already
+ # We do this because we need to make some commits for the mvn release plugin to work.
+ 853 svn copy -m "Publishing 0.92.2 to mvn" https://svn.apache.org/repos/asf/hbase/tags/0.92.2 https://svn.apache.org/repos/asf/hbase/tags/0.92.2mvn
+ 857 svn checkout https://svn.apache.org/repos/asf/hbase/tags/0.92.2mvn
+ 858 cd 0.92.2mvn/
+ # Edit the version making it release version with a '-SNAPSHOT' suffix (See below for more on this)
+ 860 vi pom.xml
+ 861 svn commit -m "Add SNAPSHOT to the version" pom.xml
+ 862 ~/bin/mvn/bin/mvn release:clean
+ 865 ~/bin/mvn/bin/mvn release:prepare
+ 866 # Answer questions and then ^C to kill the build after the last question. See below for more on this.
+ 867 vi release.properties
+ # Change the references to trunk svn to be 0.92.2mvn; the release plugin presumes trunk
+ # Then restart the release:prepare -- it won't ask questions
+ # because the properties file exists.
+ 868 ~/bin/mvn/bin/mvn release:prepare
+ # The apache-release profile comes from the apache parent pom and does signing of artifacts published
+ 869 ~/bin/mvn/bin/mvn release:perform -Papache-release
+ # When done copying up to apache staging repository,
+ # browse to repository.apache.org, login and finish
+ # the release as according to the above
+ # "Publishing Maven Artifacts.
+
+
+ Below is more detail on the commmands listed above.
+ At the mvn release:perform step, before starting, if you are for example
+ releasing hbase 0.92.2, you need to make sure the pom.xml version is 0.92.2-SNAPSHOT. This needs
+ to be checked in. Since we do the maven release after actual release, I've been doing this
+ checkin into a copy of the release tag rather than into the actual release tag itself (presumes the release has been properly tagged in svn).
+ So, say we released hbase 0.92.2 and now we want to do the release to the maven repository, in svn, the 0.92.2
+ release will be tagged 0.92.2. Making the maven release, copy the 0.92.2 tag to 0.92.2mvn.
+ Check out this tag and change the version therein and commit.
+
+
+ Currently, the mvn release wants to go against trunk. I haven't figured how to tell it to do otherwise
+ so I do the below hack. The hack comprises answering the questions put to you by the mvn release plugin properly,
+ then immediately control-C'ing the build after the last question asked as the build release step starts to run.
+ After control-C'ing it, You'll notice a release.properties in your build dir. Review it.
+ Make sure it is using the proper branch -- it tends to use trunk rather than the 0.92.2mvn or whatever
+ that you want it to use -- so hand edit the release.properties file that was put under ${HBASE_HOME}
+ by the release:perform invocation. When done, resstart the
+ release:perform.
+ Here is how I'd answer the questions at release:prepare time:
+ What is the release version for "HBase"? (org.apache.hbase:hbase) 0.92.2: :
+What is SCM release tag or label for "HBase"? (org.apache.hbase:hbase) hbase-0.92.2: : 0.92.2mvn
+What is the new development version for "HBase"? (org.apache.hbase:hbase) 0.92.3-SNAPSHOT: :
+[INFO] Transforming 'HBase'...
+
+ When you run release:perform, pass -Papache-release
+ else it will not 'sign' the artifacts it uploads.
+
+ A strange issue I ran into was the one where the upload into the apache
+ repository was being sprayed across multiple apache machines making it so I could
+ not release. See INFRA-4482 Why is my upload to mvn spread across multiple repositories?.
+
+ Here is my ~/.m2/settings.xml.
+ This is read by the release plugin. The apache-release profile will pick up your
+ gpg key setup from here if you've specified it into the file. The password
+ can be maven encrypted as suggested in the "Publishing Maven Artifacts" but plain
+ text password works too (just don't let anyone see your local settings.xml).
+ <settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0
+ http://maven.apache.org/xsd/settings-1.0.0.xsd">
+ <servers>
+ <!- To publish a snapshot of some part of Maven -->
+ <server>
+ <id>apache.snapshots.https</id>
+ <username>YOUR_APACHE_ID
+ </username>
+ <password>YOUR_APACHE_PASSWORD
+ </password>
+ </server>
+ <!-- To publish a website using Maven -->
+ <!-- To stage a release of some part of Maven -->
+ <server>
+ <id>apache.releases.https</id>
+ <username>YOUR_APACHE_ID
+ </username>
+ <password>YOUR_APACHE_PASSWORD
+ </password>
+ </server>
+ </servers>
+ <profiles>
+ <profile>
+ <id>apache-release</id>
+ <properties>
+ <gpg.keyname>YOUR_KEYNAME</gpg.keyname>
+ <!--Keyname is something like this ... 00A5F21E... do gpg --list-keys to find it-->
+ <gpg.passphrase>YOUR_KEY_PASSWORD
+ </gpg.passphrase>
+ </properties>
+ </profile>
+ </profiles>
+</settings>
+
+
+
If you see run into the below, its because you need to edit version in the pom.xml and add
-SNAPSHOT to the version (and commit).
[INFO] Scanning for projects...
@@ -168,73 +308,163 @@ Access restriction: The method getLong(Object, long) from the type Unsafe is not
[INFO] -----------------------------------------------------------------------
- Build Gotchas
- If you see Unable to find resource 'VM_global_library.vm', ignore it.
- Its not an error. It is officially ugly though.
-
+
+ Generating the HBase Reference Guide
+ The manual is marked up using docbook.
+ We then use the docbkx maven plugin
+ to transform the markup to html. This plugin is run when you specify the site
+ goal as in when you run mvn site or you can call the plugin explicitly to
+ just generate the manual by doing mvn docbkx:generate-html
+ (TODO: It looks like you have to run mvn site first because docbkx wants to
+ include a transformed hbase-default.xml. Fix).
+ When you run mvn site, we do the document generation twice, once to generate the multipage
+ manual and then again for the single page manual (the single page version is easier to search).
+
-
-
+
+ Updating hbase.apache.org
+
+ Contributing to hbase.apache.org
+ The Apache HBase apache web site (including this reference guide) is maintained as part of the main Apache HBase source tree, under /src/docbkx and /src/site. The former is this reference guide; the latter, in most cases, are legacy pages that are in the process of being merged into the docbkx tree.
+ To contribute to the reference guide, edit these files and submit them as a patch (see ). Your Jira should contain a summary of the changes in each section (see HBASE-6081 for an example).
+ To generate the site locally while you're working on it, run:
+ mvn site
+ Then you can load up the generated HTML files in your browser (file are under /target/site).
+
+
+ Publishing hbase.apache.org
+ As of INFRA-5680 Migrate apache hbase website,
+ to publish the website, build it, and then deploy it over a checkout of https://svn.apache.org/repos/asf/hbase/hbase.apache.org/trunk,
+ and then check it in. For example, if trunk is checked out out at /Users/stack/checkouts/trunk
+ and hbase.apache.org is checked out at /Users/stack/checkouts/hbase.apache.org/trunk, to update
+ the site, do the following:
+
+ # Build the site and deploy it to the checked out directory
+ # Getting the javadoc into site is a little tricky. You have to build it independent, then
+ # 'aggregate' it at top-level so the pre-site site lifecycle step can find it; that is
+ # what the javadoc:javadoc and javadoc:aggregate is about.
+ $ MAVEN_OPTS=" -Xmx3g" mvn clean -DskipTests javadoc:javadoc javadoc:aggregate site site:stage -DstagingDirectory=/Users/stack/checkouts/hbase.apache.org/trunk
+ # Check the deployed site by viewing in a brower.
+ # If all is good, commit it and it will show up at http://hbase.apache.org
+ #
+ $ cd /Users/stack/checkouts/hbase.apache.org/trunk
+ $ svn commit -m 'Committing latest version of website...'
+
+
+
+ Tests
-HBase tests are divided into two groups: and
-.
-Unit tests are run by the Apache Continuous Integration server and by developers
-when they are verifying a fix does not cause breakage elsewhere in the code base.
-Integration tests are generally long-running tests that are invoked out-of-bound of
-the CI server when you want to do more intensive testing beyond the unit test set.
-Integration tests, for example, are run proving a release candidate or a production
-deploy. Below we go into more detail on each of these test types. Developers at a
-minimum should familiarize themselves with the unit test detail; unit tests in
-HBase have a character not usually seen in other projects.
+ Developers, at a minimum, should familiarize themselves with the unit test detail; unit tests in
+HBase have a character not usually seen in other projects.
+
+
+Apache HBase Modules
+As of 0.96, Apache HBase is split into multiple modules which creates "interesting" rules for
+how and where tests are written. If you are writting code for hbase-server, see
+ for how to write your tests; these tests can spin
+up a minicluster and will need to be categorized. For any other module, for example
+hbase-common, the tests must be strict unit tests and just test the class
+under test - no use of the HBaseTestingUtility or minicluster is allowed (or even possible
+given the dependency tree).
+
+ Running Tests in other Modules
+ If the module you are developing in has no other dependencies on other HBase modules, then
+ you can cd into that module and just run:
+ mvn test
+ which will just run the tests IN THAT MODULE. If there are other dependencies on other modules,
+ then you will have run the command from the ROOT HBASE DIRECTORY. This will run the tests in the other
+ modules, unless you specify to skip the tests in that module. For instance, to skip the tests in the hbase-server module,
+ you would run:
+ mvn clean test -PskipServerTests
+ from the top level directory to run all the tests in modules other than hbase-server. Note that you
+ can specify to skip tests in multiple modules as well as just for a single module. For example, to skip
+ the tests in hbase-server and hbase-common, you would run:
+ mvn clean test -PskipServerTests -PskipCommonTests
+ Also, keep in mind that if you are running tests in the hbase-server module you will need to
+ apply the maven profiles discussed in to get the tests to run properly.
+
+Unit Tests
-HBase unit tests are subdivided into three categories: small, medium and large, with
-corresponding JUnit categories:
+Apache HBase unit tests are subdivided into four categories: small, medium, large, and
+integration with corresponding JUnit categories:
SmallTests, MediumTests,
-LargeTests. JUnit categories are denoted using java annotations
-and look like this in your unit test code.
+LargeTests, IntegrationTests.
+JUnit categories are denoted using java annotations and look like this in your unit test code.
...
@Category(SmallTests.class)
public class TestHRegionInfo {
-
@Test
public void testCreateHRegionInfoName() throws Exception {
// ...
}
}
-The above example shows how to mark a test as belonging to the small category.
+The above example shows how to mark a unit test as belonging to the small category.
+All unit tests in HBase have a categorization.
+The first three categories, small, medium, and large are for tests run when
+you type $ mvn test; i.e. these three categorizations are for
+HBase unit tests. The integration category is for not for unit tests but for integration
+tests. These are run when you invoke $ mvn verify. Integration tests
+are described in integration tests section and will not be discussed further
+in this section on HBase unit tests.
+
+Apache HBase uses a patched maven surefire plugin and maven profiles to implement
+its unit test characterizations.
+
+Read the below to figure which annotation of the set small, medium, and large to
+put on your new HBase unit test.
+
+
+
+Small TestsSmallTests
+Small tests are executed in a shared JVM. We put in this category all the tests that can
-be executed quickly in a shared JVM. The maximum execution time for a test is 15 seconds,
-and they do not use a cluster. Medium tests represent tests that must be executed
+be executed quickly in a shared JVM. The maximum execution time for a small test is 15 seconds,
+and small tests should not use a (mini)cluster.
+
+
+
+Medium TestsMediumTests
+Medium tests represent tests that must be executed
before proposing a patch. They are designed to run in less than 30 minutes altogether,
and are quite stable in their results. They are designed to last less than 50 seconds
individually. They can use a cluster, and each of them is executed in a separate JVM.
-Large tests are everything else. They are typically integration-like
-tests (yes, some large tests should be moved out to be HBase ),
-regression tests for specific bugs, timeout tests, performance tests.
+
+
+
+
+Large TestsLargeTests
+Large tests are everything else. They are typically large-scale
+tests, regression tests for specific bugs, timeout tests, performance tests.
They are executed before a commit on the pre-integration machines. They can be run on
the developer machine as well.
-HBase uses a patched maven surefire plugin and maven profiles to implement its
-unit test characterizations.
+
+
+Integration TestsIntegrationTests
+Integration tests are system level tests. See
+integration tests section for more info.
+
+
+Running tests
-Below we describe how to run the HBase junit categories.
+Below we describe how to run the Apache HBase junit categories.Default: small and medium category tests
-Running mvn test will execute all small tests in a single JVM and medium tests in a separate JVM for
-each test instance. Medium tests are NOT executed if there is an error in a small test.
+Running mvn test will execute all small tests in a single JVM
+(no fork) and then medium tests in a separate JVM for each test instance.
+Medium tests are NOT executed if there is an error in a small test.
Large tests are NOT executed. There is one report for small tests, and one report for
-medium tests if they are executed. To run small and medium tests with the security
-profile enabled, do mvn test -P security
+medium tests if they are executed.
@@ -244,42 +474,69 @@ profile enabled, do mvn test -P security
will execute small tests in a single JVM then medium and large tests in a separate JVM for each test.
Medium and large tests are NOT executed if there is an error in a small test.
Large tests are NOT executed if there is an error in a small or medium test.
-There is one report for small tests, and one report for medium and large tests if they are executed
+There is one report for small tests, and one report for medium and large tests if they are executed.
Running a single test or all tests in a packageTo run an individual test, e.g. MyTest, do
-mvn test -P localTests -Dtest=MyTest You can also
+mvn test -Dtest=MyTest You can also
pass multiple, individual tests as a comma-delimited list:
-mvn test -P localTests -Dtest=MyTest1,MyTest2,MyTest3
+mvn test -Dtest=MyTest1,MyTest2,MyTest3
You can also pass a package, which will run all tests under the package:
-mvn test -P localTests -Dtest=org.apache.hadoop.hbase.client.*
-To run a single test with the security profile enabled:
-mvn test -P security,localTests -Dtest=TestGet
+mvn test -Dtest=org.apache.hadoop.hbase.client.*
-The -P localTests will remove the JUnit category effect (without this specific profile,
-the profiles are taken into account). It will actually use the official release of surefire
-and the old connector (The HBase build uses a patched version of the maven surefire plugin).
-junit tests are executed in separated JVM. You will see a new message at the end of the
-report: "[INFO] Tests are skipped". It's harmless.
+When -Dtest is specified, localTests profile will be used. It will use the official release
+of maven surefire, rather than our custom surefire plugin, and the old connector (The HBase build uses a patched
+version of the maven surefire plugin). Each junit tests is executed in a separate JVM (A fork per test class).
+There is no parallelization when tests are running in this mode. You will see a new message at the end of the
+-report: "[INFO] Tests are skipped". It's harmless. While you need to make sure the sum of Tests run: in
+the Results : section of test reports matching the number of tests you specified because no
+error will be reported when a non-existent test case is specified.
Other test invocation permutations
-Running mvn test -P runSmallTests will execute small tests only, in a single JVM.
+Running mvn test -P runSmallTests will execute "small" tests only, using a single JVM.
+
+Running mvn test -P runMediumTests will execute "medium" tests only, launching a new JVM for each test-class.
+
+Running mvn test -P runLargeTests will execute "large" tests only, launching a new JVM for each test-class.
-Running mvn test -P runMediumTests will execute medium tests in a single JVM.
+For convenience, you can run mvn test -P runDevTests to execute both small and medium tests, using a single JVM.
-Running mvn test -P runLargeTests execute medium tests in a single JVM.
+
+
+
+Running tests faster
+
+By default, $ mvn test -P runAllTests runs 5 tests in parallel.
+It can be increased on a developer's machine. Allowing that you can have 2
+tests in parallel per core, and you need about 2Gb of memory per test (at the
+extreme), if you have an 8 core, 24Gb box, you can have 16 tests in parallel.
+but the memory available limits it to 12 (24/2), To run all tests with 12 tests
+in parallell, do this:
+mvn test -P runAllTests -Dsurefire.secondPartThreadCount=12.
+To increase the speed, you can as well use a ramdisk. You will need 2Gb of memory
+to run all tests. You will also need to delete the files between two test run.
+The typical way to configure a ramdisk on Linux is:
+$ sudo mkdir /ram2G
+sudo mount -t tmpfs -o size=2048M tmpfs /ram2G
+You can then use it to run all HBase tests with the command:
+mvn test -P runAllTests -Dsurefire.secondPartThreadCount=12 -Dtest.build.data.basedirectory=/ram2G
+
+
+hbasetests.shIt's also possible to use the script hbasetests.sh. This script runs the medium and
-large tests in parallel with two maven instances, and provide a single report.
+large tests in parallel with two maven instances, and provides a single report. This script does not use
+the hbase version of surefire so no parallelization is being done other than the two maven instances the
+script sets up.
It must be executed from the directory which contains the pom.xml.For example running
./dev-support/hbasetests.sh will execute small and medium tests.
@@ -288,6 +545,26 @@ Running ./dev-support/hbasetests.sh replayFailed
+
+Test Resource CheckerTest Resource Checker
+
+A custom Maven SureFire plugin listener checks a number of resources before
+and after each HBase unit test runs and logs its findings at the end of the test
+output files which can be found in target/surefire-reports
+per Maven module (Tests write test reports named for the test class into this directory.
+Check the *-out.txt files). The resources counted are the number
+of threads, the number of file descriptors, etc. If the number has increased, it adds
+a LEAK? comment in the logs. As you can have an HBase instance
+running in the background, some threads can be deleted/created without any specific
+action in the test. However, if the test does not work as expected, or if the test
+should not impact these resources, it's worth checking these log lines
+...hbase.ResourceChecker(157): before... and
+...hbase.ResourceChecker(157): after.... For example:
+
+2012-09-26 09:22:15,315 INFO [pool-1-thread-1] hbase.ResourceChecker(157): after: regionserver.TestColumnSeeking#testReseeking Thread=65 (was 65), OpenFileDescriptor=107 (was 107), MaxFileDescriptor=10240 (was 10240), ConnectionCount=1 (was 1)
+
+
+
@@ -307,8 +584,12 @@ Tests should not overlog. More than 100 lines/second makes the logs complex to r
Tests can be written with HBaseTestingUtility.
This class offers helper functions to create a temp directory and do the cleanup, or to start a cluster.
-Categories and execution time
+
+
+
+Categories and execution time
+
All tests must be categorized, if not they could be skipped.
@@ -345,30 +626,60 @@ As most as possible, tests should use the default settings for the cluster. When
-
Integration Tests
-HBase integration Tests are tests that are beyond HBase unit tests. They
+HBase integration/system tests are tests that are beyond HBase unit tests. They
are generally long-lasting, sizeable (the test can be asked to 1M rows or 1B rows),
targetable (they can take configuration that will point them at the ready-made cluster
they are to run against; integration tests do not include cluster start/stop code),
and verifying success, integration tests rely on public APIs only; they do not
-attempt to examine server internals asserring success/fail. Integration tests
+attempt to examine server internals asserting success/fail. Integration tests
are what you would run when you need to more elaborate proofing of a release candidate
beyond what unit tests can do. They are not generally run on the Apache Continuous Integration
-build server.
+build server, however, some sites opt to run integration tests as a part of their
+continuous testing on an actual cluster.
-Integration tests currently live under the src/test directory and
-will match the regex: **/IntegrationTest*.java.
+Integration tests currently live under the src/test directory
+in the hbase-it submodule and will match the regex: **/IntegrationTest*.java.
+All integration tests are also annotated with @Category(IntegrationTests.class).
+
+
+Integration tests can be run in two modes: using a mini cluster, or against an actual distributed cluster.
+Maven failsafe is used to run the tests using the mini cluster. IntegrationTestsDriver class is used for
+executing the tests against a distributed cluster. Integration tests SHOULD NOT assume that they are running against a
+mini cluster, and SHOULD NOT use private API's to access cluster state. To interact with the distributed or mini
+cluster uniformly, IntegrationTestingUtility, and HBaseCluster classes,
+and public client API's can be used.
+
+
+
+On a distributed cluster, integration tests that use ChaosMonkey or otherwise manipulate services thru cluster manager (e.g. restart regionservers) use SSH to do it.
+To run these, test process should be able to run commands on remote end, so ssh should be configured accordingly (for example, if HBase runs under hbase
+user in your cluster, you can set up passwordless ssh for that user and run the test also under it). To facilitate that, hbase.it.clustermanager.ssh.user,
+hbase.it.clustermanager.ssh.opts and hbase.it.clustermanager.ssh.cmd configuration settings can be used. "User" is the remote user that cluster manager should use to perform ssh commands.
+"Opts" contains additional options that are passed to SSH (for example, "-i /tmp/my-key").
+Finally, if you have some custom environment setup, "cmd" is the override format for the entire tunnel (ssh) command. The default string is {/usr/bin/ssh %1$s %2$s%3$s%4$s "%5$s"} and is a good starting point. This is a standard Java format string with 5 arguments that is used to execute the remote command. The argument 1 (%1$s) is SSH options set the via opts setting or via environment variable, 2 is SSH user name, 3 is "@" if username is set or "" otherwise, 4 is the target host name, and 5 is the logical command to execute (that may include single quotes, so don't use them). For example, if you run the tests under non-hbase user and want to ssh as that user and change to hbase on remote machine, you can use {/usr/bin/ssh %1$s %2$s%3$s%4$s "su hbase - -c \"%5$s\""}. That way, to kill RS (for example) integration tests may run {/usr/bin/ssh some-hostname "su hbase - -c \"ps aux | ... | kill ...\""}.
+The command is logged in the test logs, so you can verify it is correct for your environment.
+
+
+
+Running integration tests against mini clusterHBase 0.92 added a verify maven target.
Invoking it, for example by doing mvn verify, will
run all the phases up to and including the verify phase via the
maven failsafe plugin,
running all the above mentioned HBase unit tests as well as tests that are in the HBase integration test group.
-If you just want to run the integration tests, you need to run two commands. First:
+After you have completed
+ mvn install -DskipTests
+You can run just the integration tests by invoking:
+
+cd hbase-it
+mvn verify
+
+If you just want to run the integration tests in top-level, you need to run two commands. First:
mvn failsafe:integration-test
This actually runs ALL the integration tests.
This command will always output BUILD SUCCESS even if there are test failures.
@@ -379,75 +690,170 @@ This actually runs ALL the integration tests.
Running a subset of Integration tests
- This is very similar to how you specify running a subset of unit tests (see above).
+ This is very similar to how you specify running a subset of unit tests (see above), but use the property
+ it.test instead of test.
To just run IntegrationTestClassXYZ.java, use:
- mvn failsafe:integration-test -Dtest=IntegrationTestClassXYZ
- Pretty similar, right?
+ mvn failsafe:integration-test -Dit.test=IntegrationTestClassXYZ
The next thing you might want to do is run groups of integration tests, say all integration tests that are named IntegrationTestClassX*.java:
- mvn failsafe:integration-test -Dtest=*ClassX*
+ mvn failsafe:integration-test -Dit.test=*ClassX*
This runs everything that is an integration test that matches *ClassX*. This means anything matching: "**/IntegrationTest*ClassX*".
You can also run multiple groups of integration tests using comma-delimited lists (similar to unit tests). Using a list of matches still supports full regex matching for each of the groups.This would look something like:
- mvn failsafe:integration-test -Dtest=*ClassX*, *ClassY
+ mvn failsafe:integration-test -Dit.test=*ClassX*, *ClassY
-
+
+
+Running integration tests against distributed cluster
+
+If you have an already-setup HBase cluster, you can launch the integration tests by invoking the class IntegrationTestsDriver. You may have to
+run test-compile first. The configuration will be picked by the bin/hbase script.
+mvn test-compile
+Then launch the tests with:
+bin/hbase [--config config_dir] org.apache.hadoop.hbase.IntegrationTestsDriver [-test=class_regex]
+
+This execution will launch the tests under hbase-it/src/test, having @Category(IntegrationTests.class) annotation,
+and a name starting with IntegrationTests. If specified, class_regex will be used to filter test classes. The regex is checked against full class name; so, part of class name can be used.
+IntegrationTestsDriver uses Junit to run the tests. Currently there is no support for running integration tests against a distributed cluster using maven (see HBASE-6201).
+
+
+
+The tests interact with the distributed cluster by using the methods in the DistributedHBaseCluster (implementing HBaseCluster) class, which in turn uses a pluggable ClusterManager. Concrete implementations provide actual functionality for carrying out deployment-specific and environment-dependent tasks (SSH, etc). The default ClusterManager is HBaseClusterManager, which uses SSH to remotely execute start/stop/kill/signal commands, and assumes some posix commands (ps, etc). Also assumes the user running the test has enough "power" to start/stop servers on the remote machines. By default, it picks up HBASE_SSH_OPTS, HBASE_HOME, HBASE_CONF_DIR from the env, and uses bin/hbase-daemon.sh to carry out the actions. Currently tarball deployments, deployments which uses hbase-daemons.sh, and Apache Ambari deployments are supported. /etc/init.d/ scripts are not supported for now, but it can be easily added. For other deployment options, a ClusterManager can be implemented and plugged in.
+
+
+
+
+Destructive integration / system tests
+
+ In 0.96, a tool named ChaosMonkey has been introduced. It is modeled after the same-named tool by Netflix.
+Some of the tests use ChaosMonkey to simulate faults in the running cluster in the way of killing random servers,
+disconnecting servers, etc. ChaosMonkey can also be used as a stand-alone tool to run a (misbehaving) policy while you
+are running other tests.
+
+
+
+ChaosMonkey defines Action's and Policy's. Actions are sequences of events. We have at least the following actions:
+
+Restart active master (sleep 5 sec)
+Restart random regionserver (sleep 5 sec)
+Restart random regionserver (sleep 60 sec)
+Restart META regionserver (sleep 5 sec)
+Restart ROOT regionserver (sleep 5 sec)
+Batch restart of 50% of regionservers (sleep 5 sec)
+Rolling restart of 100% of regionservers (sleep 5 sec)
+
+
+Policies on the other hand are responsible for executing the actions based on a strategy.
+The default policy is to execute a random action every minute based on predefined action
+weights. ChaosMonkey executes predefined named policies until it is stopped. More than one
+policy can be active at any time.
+
+
+
+ To run ChaosMonkey as a standalone tool deploy your HBase cluster as usual. ChaosMonkey uses the configuration
+from the bin/hbase script, thus no extra configuration needs to be done. You can invoke the ChaosMonkey by running:
+bin/hbase org.apache.hadoop.hbase.util.ChaosMonkey
+
+This will output smt like:
+
+12/11/19 23:21:57 INFO util.ChaosMonkey: Using ChaosMonkey Policy: class org.apache.hadoop.hbase.util.ChaosMonkey$PeriodicRandomActionPolicy, period:60000
+12/11/19 23:21:57 INFO util.ChaosMonkey: Sleeping for 26953 to add jitter
+12/11/19 23:22:24 INFO util.ChaosMonkey: Performing action: Restart active master
+12/11/19 23:22:24 INFO util.ChaosMonkey: Killing master:master.example.com,60000,1353367210440
+12/11/19 23:22:24 INFO hbase.HBaseCluster: Aborting Master: master.example.com,60000,1353367210440
+12/11/19 23:22:24 INFO hbase.ClusterManager: Executing remote command: ps aux | grep master | grep -v grep | tr -s ' ' | cut -d ' ' -f2 | xargs kill -s SIGKILL , hostname:master.example.com
+12/11/19 23:22:25 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:
+12/11/19 23:22:25 INFO hbase.HBaseCluster: Waiting service:master to stop: master.example.com,60000,1353367210440
+12/11/19 23:22:25 INFO hbase.ClusterManager: Executing remote command: ps aux | grep master | grep -v grep | tr -s ' ' | cut -d ' ' -f2 , hostname:master.example.com
+12/11/19 23:22:25 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:
+12/11/19 23:22:25 INFO util.ChaosMonkey: Killed master server:master.example.com,60000,1353367210440
+12/11/19 23:22:25 INFO util.ChaosMonkey: Sleeping for:5000
+12/11/19 23:22:30 INFO util.ChaosMonkey: Starting master:master.example.com
+12/11/19 23:22:30 INFO hbase.HBaseCluster: Starting Master on: master.example.com
+12/11/19 23:22:30 INFO hbase.ClusterManager: Executing remote command: /homes/enis/code/hbase-0.94/bin/../bin/hbase-daemon.sh --config /homes/enis/code/hbase-0.94/bin/../conf start master , hostname:master.example.com
+12/11/19 23:22:31 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:starting master, logging to /homes/enis/code/hbase-0.94/bin/../logs/hbase-enis-master-master.example.com.out
+....
+12/11/19 23:22:33 INFO util.ChaosMonkey: Started master: master.example.com,60000,1353367210440
+12/11/19 23:22:33 INFO util.ChaosMonkey: Sleeping for:51321
+12/11/19 23:23:24 INFO util.ChaosMonkey: Performing action: Restart random region server
+12/11/19 23:23:24 INFO util.ChaosMonkey: Killing region server:rs3.example.com,60020,1353367027826
+12/11/19 23:23:24 INFO hbase.HBaseCluster: Aborting RS: rs3.example.com,60020,1353367027826
+12/11/19 23:23:24 INFO hbase.ClusterManager: Executing remote command: ps aux | grep regionserver | grep -v grep | tr -s ' ' | cut -d ' ' -f2 | xargs kill -s SIGKILL , hostname:rs3.example.com
+12/11/19 23:23:25 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:
+12/11/19 23:23:25 INFO hbase.HBaseCluster: Waiting service:regionserver to stop: rs3.example.com,60020,1353367027826
+12/11/19 23:23:25 INFO hbase.ClusterManager: Executing remote command: ps aux | grep regionserver | grep -v grep | tr -s ' ' | cut -d ' ' -f2 , hostname:rs3.example.com
+12/11/19 23:23:25 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:
+12/11/19 23:23:25 INFO util.ChaosMonkey: Killed region server:rs3.example.com,60020,1353367027826. Reported num of rs:6
+12/11/19 23:23:25 INFO util.ChaosMonkey: Sleeping for:60000
+12/11/19 23:24:25 INFO util.ChaosMonkey: Starting region server:rs3.example.com
+12/11/19 23:24:25 INFO hbase.HBaseCluster: Starting RS on: rs3.example.com
+12/11/19 23:24:25 INFO hbase.ClusterManager: Executing remote command: /homes/enis/code/hbase-0.94/bin/../bin/hbase-daemon.sh --config /homes/enis/code/hbase-0.94/bin/../conf start regionserver , hostname:rs3.example.com
+12/11/19 23:24:26 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:starting regionserver, logging to /homes/enis/code/hbase-0.94/bin/../logs/hbase-enis-regionserver-rs3.example.com.out
+
+12/11/19 23:24:27 INFO util.ChaosMonkey: Started region server:rs3.example.com,60020,1353367027826. Reported num of rs:6
+
+
+As you can see from the log, ChaosMonkey started the default PeriodicRandomActionPolicy, which is configured with all the available actions, and ran RestartActiveMaster and RestartRandomRs actions. ChaosMonkey tool, if run from command line, will keep on running until the process is killed.
+
+
+
-
-
+
+ Maven Build CommandsAll commands executed from the local HBase project directory.
Note: use Maven 3 (Maven 2 may work but we suggest you use Maven 3).
-
+ Compile
mvn compile
-
+
-
+ Running all or individual Unit TestsSee the section
above in
-
-
-
- Running all or individual Integration Tests
- See
-
-
+
-
- To build against hadoop 0.22.x or 0.23.x
-
-mvn -Dhadoop.profile=22 ...
-
-That is, designate build with hadoop.profile 22. Pass 23 for hadoop.profile to build against hadoop 0.23.
-Tests do not all pass as of this writing so you may need ot pass -DskipTests unless you are inclined
-to fix the failing tests.
-
+
+ Building against various hadoop versions.
+ As of 0.96, Apache HBase supports building against Apache Hadoop versions: 1.0.3, 2.0.0-alpha and 3.0.0-SNAPSHOT.
+ By default, we will build with Hadoop-1.0.3. To change the version to run with Hadoop-2.0.0-alpha, you would run:
+ mvn -Dhadoop.profile=2.0 ...
+
+ That is, designate build with hadoop.profile 2.0. Pass 2.0 for hadoop.profile to build against hadoop 2.0.
+ Tests may not all pass as of this writing so you may need to pass -DskipTests unless you are inclined
+ to fix the failing tests.
+
+ Similarly, for 3.0, you would just replace the profile value. Note that Hadoop-3.0.0-SNAPSHOT does not currently have a deployed maven artificat - you will need to build and install your own in your local maven repository if you want to run against this profile.
+
+
+ In earilier verions of Apache HBase, you can build against older versions of Apache Hadoop, notably, Hadoop 0.22.x and 0.23.x.
+ If you are running, for example HBase-0.94 and wanted to build against Hadoop 0.23.x, you would run with:
+ mvn -Dhadoop.profile=22 ...
-
-
+
+ Getting Involved
- HBase gets better only when people contribute!
+ Apache HBase gets better only when people contribute!
- As HBase is an Apache Software Foundation project, see for more information about how the ASF functions.
+ As Apache HBase is an Apache Software Foundation project, see for more information about how the ASF functions.
Mailing Lists
- Sign up for the dev-list and the user-list. See the
+ Sign up for the dev-list and the user-list. See the
mailing lists page.
- Posing questions - and helping to answer other people's questions - is encouraged!
- There are varying levels of experience on both lists so patience and politeness are encouraged (and please
- stay on topic.)
+ Posing questions - and helping to answer other people's questions - is encouraged!
+ There are varying levels of experience on both lists so patience and politeness are encouraged (and please
+ stay on topic.)
Jira
- Check for existing issues in Jira.
+ Check for existing issues in Jira.
If it's either a new feature request, enhancement, or a bug, file a ticket.
Jira Priorities
@@ -457,10 +863,10 @@ to fix the failing tests.
Critical: The issue described can cause data loss or cluster instability in some cases.Major: Important but not tragic issues, like updates to the client API that will add a lot of much-needed functionality or significant
bugs that need to be fixed but that don't cause data loss.
- Minor: Useful enhancements and annoying but not damaging bugs.
- Trivial: Useful enhancements but generally cosmetic.
-
-
+ Minor: Useful enhancements and annoying but not damaging bugs.
+ Trivial: Useful enhancements but generally cosmetic.
+
+ Code Blocks in Jira Comments
@@ -475,15 +881,15 @@ to fix the failing tests.
-
+
DevelopingCodelinesMost development is done on TRUNK. However, there are branches for minor releases (e.g., 0.90.1, 0.90.2, and 0.90.3 are on the 0.90 branch).If you have any questions on this just send an email to the dev dist-list.
-
-
+
+ Unit TestsIn HBase we use JUnit 4.
If you need to run miniclusters of HDFS, ZooKeeper, HBase, or MapReduce testing,
@@ -506,30 +912,82 @@ to fix the failing tests.
+
+
Code StandardsSee and .
-
-
+ Also, please pay attention to the interface stability/audience classifications that you
+ will see all over our code base. They look like this at the head of the class:
+ @InterfaceAudience.Public
+@InterfaceStability.Stable
+
+ If the InterfaceAudience is Private,
+ we can change the class (and we do not need to include a InterfaceStability mark).
+ If a class is marked Public but its InterfaceStability
+ is marked Unstable, we can change it. If it's
+ marked Public/Evolving, we're allowed to change it
+ but should try not to. If it's Public and Stable
+ we can't change it without a deprecation path or with a really GREAT reason.
+ When you add new classes, mark them with the annotations above if publically accessible.
+ If you are not cleared on how to mark your additions, ask up on the dev list.
+
+ This convention comes from our parent project Hadoop.
+
+
+
+ Invariants
+ We don't have many but what we have we list below. All are subject to challenge of
+ course but until then, please hold to the rules of the road.
+
+
+ No permanent state in ZooKeeper
+ ZooKeeper state should transient (treat it like memory). If deleted, hbase
+ should be able to recover and essentially be in the same stateThere are currently
+ a few exceptions that we need to fix around whether a table is enabled or disabled.
+
+
+
+
+
+
+ Running In-Situ
+ If you are developing Apache HBase, frequently it is useful to test your changes against a more-real cluster than what you find in unit tests. In this case, HBase can be run directly from the source in local-mode.
+ All you need to do is run:
+
+ ${HBASE_HOME}/bin/start-hbase.sh
+
+ This will spin up a full local-cluster, just as if you had packaged up HBase and installed it on your machine.
+
+ Keep in mind that you will need to have installed HBase into your local maven repository for the in-situ cluster to work properly. That is, you will need to run:
+ mvn clean install -DskipTests
+ to ensure that maven can find the correct classpath and dependencies. Generally, the above command
+ is just a good thing to try running first, if maven is acting oddly.
+ Submitting Patches
+ If you are new to submitting patches to open source or new to submitting patches to Apache,
+ I'd suggest you start by reading the On Contributing Patches
+ page from Apache Commons Project. Its a nice overview that
+ applies equally to the Apache HBase Project.Create Patch
- Patch files can be easily generated from Eclipse, for example by selecting "Team -> Create Patch".
+ See the aforementioned Apache Commons link for how to make patches against a checked out subversion
+ repository. Patch files can also be easily generated from Eclipse, for example by selecting "Team -> Create Patch".
Patches can also be created by git diff and svn diff.
- Please submit one patch-file per Jira. For example, if multiple files are changed make sure the
+ Please submit one patch-file per Jira. For example, if multiple files are changed make sure the
selected resource when generating the patch is a directory. Patch files can reflect changes in multiple files. Make sure you review for code style. Patch File Naming
- The patch file should have the HBase Jira ticket in the name. For example, if a patch was submitted for Foo.java, then
- a patch file called Foo_HBASE_XXXX.patch would be acceptable where XXXX is the HBase Jira number.
+ The patch file should have the Apache HBase Jira ticket in the name. For example, if a patch was submitted for Foo.java, then
+ a patch file called Foo_HBASE_XXXX.patch would be acceptable where XXXX is the Apache HBase Jira number.
If you generating from a branch, then including the target branch in the filename is advised, e.g., HBASE-XXXX-0.90.patch.
@@ -539,26 +997,30 @@ to fix the failing tests.
Yes, please. Please try to include unit tests with every code patch (and especially new classes and large changes).
Make sure unit tests pass locally before submitting the patch.Also, see .
+ If you are creating a new unit test class, notice how other unit test classes have classification/sizing
+ annotations at the top and a static method on the end. Be sure to include these in any new unit test files
+ you generate. See for more on how the annotations work.
+ Attach Patch to JiraThe patch should be attached to the associated Jira ticket "More Actions -> Attach Files". Make sure you click the
ASF license inclusion, otherwise the patch can't be considered for inclusion.
- Once attached to the ticket, click "Submit Patch" and
+ Once attached to the ticket, click "Submit Patch" and
the status of the ticket will change. Committers will review submitted patches for inclusion into the codebase. Please
understand that not every patch may get committed, and that feedback will likely be provided on the patch. Fear not, though,
- because the HBase community is helpful!
+ because the Apache HBase community is helpful!
-
+
Common Patch FeedbackThe following items are representative of common patch feedback. Your patch process will go faster if these are
taken into account before submission.
- See the Java coding standards
+ See the Java coding standards
for more information on coding conventions in Java.
@@ -567,7 +1029,7 @@ to fix the failing tests.
if ( foo.equals( bar ) ) { // don't do this
- ... do this instead...
+ ... do this instead...
if (foo.equals(bar)) {
@@ -576,9 +1038,9 @@ if (foo.equals(bar)) {
foo = barArray[ i ]; // don't do this
- ... do this instead...
+ ... do this instead...
-foo = barArray[i];
+foo = barArray[i];
@@ -589,7 +1051,7 @@ foo = barArray[i];
public void readFields(DataInput arg0) throws IOException { // don't do this
foo = arg0.readUTF(); // don't do this
- ... do this instead ...
+ ... do this instead ...
public void readFields(DataInput di) throws IOException {
foo = di.readUTF();
@@ -600,19 +1062,14 @@ foo = barArray[i];
Long Lines
- Keep lines less than 80 characters.
+ Keep lines less than 100 characters.
-Bar bar = foo.veryLongMethodWithManyArguments(argument1, argument2, argument3, argument4, argument5); // don't do this
+Bar bar = foo.veryLongMethodWithManyArguments(argument1, argument2, argument3, argument4, argument5, argument6, argument7, argument8, argument9); // don't do this
- ... do this instead ...
-
-Bar bar = foo.veryLongMethodWithManyArguments(argument1,
- argument2, argument3,argument4, argument5);
-
- ... or this, whichever looks better ...
+ ... do something like this instead ...
Bar bar = foo.veryLongMethodWithManyArguments(
- argument1, argument2, argument3,argument4, argument5);
+ argument1, argument2, argument3,argument4, argument5, argument6, argument7, argument8, argument9);
@@ -624,11 +1081,17 @@ Bar bar = foo.veryLongMethodWithManyArguments(
Bar bar = foo.getBar(); <--- imagine there's an extra space(s) after the semicolon instead of a line break.
Make sure there's a line-break after the end of your code, and also avoid lines that have nothing
- but whitespace.
+ but whitespace.
-
+ Implementing Writable
+
+ Applies pre-0.96 only
+ In 0.96, HBase moved to protobufs. The below section on Writables
+ applies to 0.94.x and previous, not to 0.96 and beyond.
+
+ Every class returned by RegionServers must implement Writable. If you
are creating a new class that needs to implement this interface, don't forget the default constructor.
@@ -636,39 +1099,60 @@ Bar bar = foo.getBar(); <--- imagine there's an extra space(s) after the
JavadocThis is also a very common feedback item. Don't forget Javadoc!
+ Javadoc warnings are checked during precommit. If the precommit tool gives you a '-1',
+ please fix the javadoc issue. Your patch won't be committed if it adds such warnings.
+
+
+ Findbugs
+
+ Findbugs is used to detect common bugs pattern. As Javadoc, it is checked during
+ the precommit build up on Apache's Jenkins, and as with Javadoc, please fix them.
+ You can run findbugs locally with 'mvn findbugs:findbugs': it will generate the
+ findbugs files locally. Sometimes, you may have to write code smarter than
+ Findbugs. You can annotate your code to tell Findbugs you know what you're
+ doing, by annotating your class with:
+ @edu.umd.cs.findbugs.annotations.SuppressWarnings(
+ value="HE_EQUALS_USE_HASHCODE",
+ justification="I know what I'm doing")
+
+
+ Note that we're using the apache licensed version of the annotations.
+
+
+
Javadoc - Useless DefaultsDon't just leave the @param arguments the way your IDE generated them. Don't do this...
/**
- *
+ *
* @param bar <---- don't do this!!!!
* @return <---- or this!!!!
*/
public Foo getFoo(Bar bar);
-
- ... either add something descriptive to the @param and @return lines, or just remove them.
- But the preference is to add something descriptive and useful.
+
+ ... either add something descriptive to the @param and @return lines, or just remove them.
+ But the preference is to add something descriptive and useful.
One Thing At A Time, FolksIf you submit a patch for one thing, don't do auto-reformatting or unrelated reformatting of code on a completely
- different area of code.
+ different area of code.
- Likewise, don't add unrelated cleanup or refactorings outside the scope of your Jira.
+ Likewise, don't add unrelated cleanup or refactorings outside the scope of your Jira.
Ambigious Unit Tests
- Make sure that you're clear about what you are testing in your unit tests and why.
+ Make sure that you're clear about what you are testing in your unit tests and why.
-
+
ReviewBoardLarger patches should go through ReviewBoard.
@@ -676,16 +1160,29 @@ Bar bar = foo.getBar(); <--- imagine there's an extra space(s) after the
For more information on how to use ReviewBoard, see
the ReviewBoard documentation.
-
+
Committing Patches
- Committers do this. See How To Commit in the HBase wiki.
+ Committers do this. See How To Commit in the Apache HBase wiki.
Commiters will also resolve the Jira, typically after the patch passes a build.
+
+ Committers are responsible for making sure commits do not break the build or tests
+
+ If a committer commits a patch it is their responsibility
+ to make sure it passes the test suite. It is helpful
+ if contributors keep an eye out that their patch
+ does not break the hbase build and/or tests but ultimately,
+ a contributor cannot be expected to be up on the
+ particular vagaries and interconnections that occur
+ in a project like hbase. A committer should.
+
+
-
+
+
diff --git a/src/docbkx/external_apis.xml b/src/docbkx/external_apis.xml
index 155a964862f3..6380b6e7b801 100644
--- a/src/docbkx/external_apis.xml
+++ b/src/docbkx/external_apis.xml
@@ -26,31 +26,34 @@
* limitations under the License.
*/
-->
- External APIs
- This chapter will cover access to HBase either through non-Java languages, or through custom protocols.
-
+ Apache HBase (TM) External APIs
+ This chapter will cover access to Apache HBase (TM) either through non-Java languages, or through custom protocols.
+
Non-Java Languages Talking to the JVM
- Currently the documentation on this topic in the
- HBase Wiki.
+ Currently the documentation on this topic in the
+ Apache HBase Wiki.
+ See also the Thrift API Javadoc.
REST
- Currently most of the documentation on REST exists in the
- HBase Wiki on REST.
+ Currently most of the documentation on REST exists in the
+ Apache HBase Wiki on REST (The REST gateway used to be
+ called 'Stargate'). There are also a nice set of blogs on How-to: Use the Apache HBase REST Interface
+ by Jesse Anderson.
Thrift
- Currently most of the documentation on Thrift exists in the
- HBase Wiki on Thrift.
+ Currently most of the documentation on Thrift exists in the
+ Apache HBase Wiki on Thrift.
Filter LanguageUse Case
- Note: this feature was introduced in HBase 0.92
+ Note: this feature was introduced in Apache HBase 0.92This allows the user to perform server-side filtering when accessing HBase over Thrift. The user specifies a filter via a string. The string is parsed on the server to construct the filter
@@ -407,10 +410,15 @@
-
+
-
+
+
+ C/C++ Apache HBase Client
+ FB's Chip Turner wrote a pure C/C++ client. Check it out.
+
+
diff --git a/src/docbkx/getting_started.xml b/src/docbkx/getting_started.xml
index 3aa392b810bf..e1c4344ef074 100644
--- a/src/docbkx/getting_started.xml
+++ b/src/docbkx/getting_started.xml
@@ -32,9 +32,8 @@
Introduction will get you up and
- running on a single-node instance of HBase using the local filesystem.
- describes setup
- of HBase in distributed mode running on top of HDFS.
+ running on a single-node instance of HBase using the local filesystem.
+
@@ -45,17 +44,31 @@
rows via the HBase shell, and then cleaning
up and shutting down your standalone HBase instance. The below exercise
should take no more than ten minutes (not including download time).
+ Before we proceed, make sure you are good on the below loopback prerequisite.
+
+ Loopback IP
+ HBase expects the loopback IP address to be 127.0.0.1. Ubuntu and some other distributions,
+ for example, will default to 127.0.1.1 and this will cause problems for you.
+
+ /etc/hosts should look something like this:
+
+ 127.0.0.1 localhost
+ 127.0.0.1 ubuntu.ubuntu-domain ubuntu
+
+
+
+
Download and unpack the latest stable release.Choose a download site from this list of Apache Download
- Mirrors. Click on suggested top link. This will take you to a
+ Mirrors. Click on the suggested top link. This will take you to a
mirror of HBase Releases. Click on the folder named
stable and then download the file that ends in
.tar.gz to your local filesystem; e.g.
- hbase-.tar.gz.
+ hbase-0.94.2.tar.gz.
Decompress and untar your download and then change into the
unpacked directory.
@@ -65,24 +78,27 @@ $ cd hbase-
At this point, you are ready to start HBase. But before starting
- it, you might want to edit conf/hbase-site.xml and
- set the directory you want HBase to write to,
- hbase.rootdir.
-
-<?xml version="1.0"?>
+ it, edit conf/hbase-site.xml, the file you write
+ your site-specific configurations into. Set
+ hbase.rootdir, the directory HBase writes data to,
+ and hbase.zookeeper.property.dataDir, the director
+ ZooKeeper writes its data too:
+<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hbase.rootdir</name>
<value>file:///DIRECTORY/hbase</value>
</property>
-</configuration>
-
- Replace DIRECTORY in the above with a
- path to a directory where you want HBase to store its data. By default,
- hbase.rootdir is set to
- /tmp/hbase-${user.name} which means you'll lose all
- your data whenever your server reboots (Most operating systems clear
+ <property>
+ <name>hbase.zookeeper.property.dataDir</name>
+ <value>/DIRECTORY/zookeeper</value>
+ </property>
+</configuration> Replace DIRECTORY in the above with the
+ path to the directory you would have HBase and ZooKeeper write their data. By default,
+ hbase.rootdir is set to /tmp/hbase-${user.name}
+ and similarly so for the default ZooKeeper data location which means you'll lose all
+ your data whenever your server reboots unless you change it (Most operating systems clear
/tmp on restart).
@@ -96,19 +112,19 @@ starting Master, logging to logs/hbase-user-master-example.org.outlogs subdirectory. Check them out especially if
- HBase had trouble starting.
+ it seems HBase had trouble starting.
Is java installed?All of the above presumes a 1.6 version of Oracle
java is installed on your machine and
- available on your path; i.e. when you type
+ available on your path (See ); i.e. when you type
java, you see output that describes the
options the java program takes (HBase requires java 6). If this is not
the case, HBase will not start. Install java, edit
conf/hbase-env.sh, uncommenting the
- JAVA_HOME line pointing it to your java install. Then,
+ JAVA_HOME line pointing it to your java install, then,
retry the steps above.
@@ -154,9 +170,7 @@ hbase(main):006:0> put 'test', 'row3', 'cf:c', 'value3'
cf in this example -- followed by a colon and then a
column qualifier suffix (a in this case).
- Verify the data insert.
-
- Run a scan of the table by doing the following
+ Verify the data insert by running a scan of the table as followshbase(main):007:0> scan 'test'
ROW COLUMN+CELL
@@ -165,7 +179,7 @@ row2 column=cf:b, timestamp=1288380738440, value=value2
row3 column=cf:c, timestamp=1288380747365, value=value3
3 row(s) in 0.0590 seconds
- Get a single row as follows
+ Get a single rowhbase(main):008:0> get 'test', 'row1'
COLUMN CELL
@@ -198,9 +212,9 @@ stopping hbase...............Where to go nextThe above described standalone setup is good for testing and
- experiments only. Next move on to where we'll go into
- depth on the different HBase run modes, requirements and critical
- configurations needed setting up a distributed HBase deploy.
+ experiments only. In the next chapter, ,
+ we'll go into depth on the different HBase run modes, system requirements
+ running HBase, and critical configurations setting up a distributed HBase deploy.
diff --git a/src/docbkx/ops_mgt.xml b/src/docbkx/ops_mgt.xml
index 3dbd718a89c2..0009ab42bc09 100644
--- a/src/docbkx/ops_mgt.xml
+++ b/src/docbkx/ops_mgt.xml
@@ -26,16 +26,35 @@
* limitations under the License.
*/
-->
- HBase Operational Management
- This chapter will cover operational tools and practices required of a running HBase cluster.
+ Apache HBase (TM) Operational Management
+ This chapter will cover operational tools and practices required of a running Apache HBase cluster.
The subject of operations is related to the topics of , ,
- and but is a distinct topic in itself.
-
+ and but is a distinct topic in itself.
+
HBase Tools and UtilitiesHere we list HBase tools for administration, analysis, fixup, and
debugging.
+ Driver
+ There is a Driver class that is executed by the HBase jar can be used to invoke frequently accessed utilities. For example,
+HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase classpath` ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-VERSION.jar
+
+... will return...
+
+An example program must be given as the first argument.
+Valid program names are:
+ completebulkload: Complete a bulk data load.
+ copytable: Export a table from local cluster to peer cluster
+ export: Write table data to HDFS.
+ import: Import data written by Export.
+ importtsv: Import data in TSV format.
+ rowcounter: Count rows in HBase table
+ verifyrep: Compare the data from tables in two different clusters. WARNING: It doesn't work for incrementColumnValues'd cells since the timestamp is chan
+
+... for allowable program names.
+
+ HBase hbckAn fsck for your HBase install
@@ -50,6 +69,8 @@
Passing -fix may correct the inconsistency (This latter
is an experimental feature).
+ For more information, see .
+ HFile ToolSee .
@@ -72,23 +93,28 @@
Similarly you can force a split of a log file directory by
doing: $ ./bin/hbase org.apache.hadoop.hbase.regionserver.wal.HLog --split hdfs://example.org:8020/hbase/.logs/example.org,60020,1283516293161/
+
+
+ HLogPrettyPrinter
+ HLogPrettyPrinter is a tool with configurable options to print the contents of an HLog.
+
+
+
Compression Tool
- See .
+ See .CopyTable
CopyTable is a utility that can copy part or of all of a table, either to the same cluster or another cluster. The usage is as follows:
-$ bin/hbase org.apache.hadoop.hbase.mapreduce.CopyTable [--rs.class=CLASS] [--rs.impl=IMPL] [--starttime=X] [--endtime=Y] [--new.name=NEW] [--peer.adr=ADR] tablename
+$ bin/hbase org.apache.hadoop.hbase.mapreduce.CopyTable [--starttime=X] [--endtime=Y] [--new.name=NEW] [--peer.adr=ADR] tablename
Options:
- rs.class hbase.regionserver.class of the peer cluster. Specify if different from current cluster.
- rs.impl hbase.regionserver.impl of the peer cluster. starttime Beginning of the time range. Without endtime means starttime to forever.endtime End of the time range. Without endtime means starttime to forever.versions Number of cell versions to copy.
@@ -104,12 +130,15 @@
Example of copying 'TestTable' to a cluster that uses replication for a 1 hour window:
$ bin/hbase org.apache.hadoop.hbase.mapreduce.CopyTable
---rs.class=org.apache.hadoop.hbase.ipc.ReplicationRegionInterface
---rs.impl=org.apache.hadoop.hbase.regionserver.replication.ReplicationRegionServer
--starttime=1265875194289 --endtime=1265878794289
--peer.adr=server1,server2,server3:2181:/hbase TestTable
- Note: caching for the input Scan is configured via hbase.client.scanner.caching in the job configuration.
+ Scanner Caching
+ Caching for the input Scan is configured via hbase.client.scanner.caching in the job configuration.
+
+
+
+ See Jonathan Hsieh's Online HBase Backups with CopyTable blog post for more on CopyTable.
@@ -128,17 +157,156 @@
+
+ ImportTsv
+ ImportTsv is a utility that will load data in TSV format into HBase. It has two distinct usages: loading data from TSV format in HDFS
+ into HBase via Puts, and preparing StoreFiles to be loaded via the completebulkload.
+
+ To load data via Puts (i.e., non-bulk loading):
+$ bin/hbase org.apache.hadoop.hbase.mapreduce.ImportTsv -Dimporttsv.columns=a,b,c <tablename> <hdfs-inputdir>
+
+
+ To generate StoreFiles for bulk-loading:
+$ bin/hbase org.apache.hadoop.hbase.mapreduce.ImportTsv -Dimporttsv.columns=a,b,c -Dimporttsv.bulk.output=hdfs://storefile-outputdir <tablename> <hdfs-data-inputdir>
+
+
+ These generated StoreFiles can be loaded into HBase via .
+
+ ImportTsv Options
+ Running ImportTsv with no arguments prints brief usage information:
+
+Usage: importtsv -Dimporttsv.columns=a,b,c <tablename> <inputdir>
+
+Imports the given input directory of TSV data into the specified table.
+
+The column names of the TSV data must be specified using the -Dimporttsv.columns
+option. This option takes the form of comma-separated column names, where each
+column name is either a simple column family, or a columnfamily:qualifier. The special
+column name HBASE_ROW_KEY is used to designate that this column should be used
+as the row key for each imported record. You must specify exactly one column
+to be the row key, and you must specify a column name for every column that exists in the
+input data.
+
+By default importtsv will load data directly into HBase. To instead generate
+HFiles of data to prepare for a bulk data load, pass the option:
+ -Dimporttsv.bulk.output=/path/for/output
+ Note: the target table will be created with default column family descriptors if it does not already exist.
+
+Other options that may be specified with -D include:
+ -Dimporttsv.skip.bad.lines=false - fail if encountering an invalid line
+ '-Dimporttsv.separator=|' - eg separate on pipes instead of tabs
+ -Dimporttsv.timestamp=currentTimeAsLong - use the specified timestamp for the import
+ -Dimporttsv.mapper.class=my.Mapper - A user-defined Mapper to use instead of org.apache.hadoop.hbase.mapreduce.TsvImporterMapper
+
+
+ ImportTsv Example
+ For example, assume that we are loading data into a table called 'datatsv' with a ColumnFamily called 'd' with two columns "c1" and "c2".
+
+ Assume that an input file exists as follows:
+
+row1 c1 c2
+row2 c1 c2
+row3 c1 c2
+row4 c1 c2
+row5 c1 c2
+row6 c1 c2
+row7 c1 c2
+row8 c1 c2
+row9 c1 c2
+row10 c1 c2
+
+
+ For ImportTsv to use this imput file, the command line needs to look like this:
+
+ HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase classpath` ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-VERSION.jar importtsv -Dimporttsv.columns=HBASE_ROW_KEY,d:c1,d:c2 -Dimporttsv.bulk.output=hdfs://storefileoutput datatsv hdfs://inputfile
+
+ ... and in this example the first column is the rowkey, which is why the HBASE_ROW_KEY is used. The second and third columns in the file will be imported as "d:c1" and "d:c2", respectively.
+
+
+ ImportTsv Warning
+ If you have preparing a lot of data for bulk loading, make sure the target HBase table is pre-split appropriately.
+
+
+ See Also
+ For more information about bulk-loading HFiles into HBase, see
+
+
+
+
+ CompleteBulkLoad
+ The completebulkload utility will move generated StoreFiles into an HBase table. This utility is often used
+ in conjunction with output from .
+
+ There are two ways to invoke this utility, with explicit classname and via the driver:
+$ bin/hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles <hdfs://storefileoutput> <tablename>
+
+.. and via the Driver..
+HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase classpath` ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-VERSION.jar completebulkload <hdfs://storefileoutput> <tablename>
+
+
+ CompleteBulkLoad Warning
+ Data generated via MapReduce is often created with file permissions that are not compatible with the running HBase process. Assuming you're running HDFS with permissions enabled, those permissions will need to be updated before you run CompleteBulkLoad.
+
+
+ For more information about bulk-loading HFiles into HBase, see .
+
+
+
+ WALPlayer
+ WALPlayer is a utility to replay WAL files into HBase.
+
+ The WAL can be replayed for a set of tables or all tables, and a
+ timerange can be provided (in milliseconds). The WAL is filtered to
+ this set of tables. The output can optionally be mapped to another set of tables.
+
+ WALPlayer can also generate HFiles for later bulk importing, in that case
+ only a single table and no mapping can be specified.
+
+ Invoke via:
+$ bin/hbase org.apache.hadoop.hbase.mapreduce.WALPlayer [options] <wal inputdir> <tables> [<tableMappings>]>
+
+
+ For example:
+$ bin/hbase org.apache.hadoop.hbase.mapreduce.WALPlayer /backuplogdir oldTable1,oldTable2 newTable1,newTable2
+
+
+
+ WALPlayer, by default, runs as a mapreduce job. To NOT run WALPlayer as a mapreduce job on your cluster,
+ force it to run all in the local process by adding the flags -Dmapred.job.tracker=local on the command line.
+
+
- RowCounter
- RowCounter is a utility that will count all the rows of a table. This is a good utility to use
- as a sanity check to ensure that HBase can read all the blocks of a table if there are any concerns of metadata inconsistency.
+ RowCounter and CellCounter
+ RowCounter is a
+ mapreduce job to count all the rows of a table. This is a good utility to use as a sanity check to ensure that HBase can read
+ all the blocks of a table if there are any concerns of metadata inconsistency. It will run the mapreduce all in a single
+ process but it will run faster if you have a MapReduce cluster in place for it to exploit.
$ bin/hbase org.apache.hadoop.hbase.mapreduce.RowCounter <tablename> [<column1> <column2>...]
- Note: caching for the input Scan is configured via hbase.client.scanner.caching in the job configuration.
+ Note: caching for the input Scan is configured via hbase.client.scanner.caching in the job configuration.
+
+ HBase ships another diagnostic mapreduce job called
+ CellCounter. Like
+ RowCounter, it gathers more fine-grained statistics about your table. The statistics gathered by RowCounter are more fine-grained
+ and include:
+
+ Total number of rows in the table.
+ Total number of CFs across all rows.
+ Total qualifiers across all rows.
+ Total occurrence of each CF.
+ Total occurrence of each qualifier.
+ Total number of versions of each qualifier.
+
+
+ The program allows you to limit the scope of the run. Provide a row regex or prefix to limit the rows to analyze. Use
+ hbase.mapreduce.scan.column.family to specify scanning a single column family.
+ $ bin/hbase org.apache.hadoop.hbase.mapreduce.CellCounter <tablename> <outputDir> [regex or prefix]
+ Note: just like RowCounter, caching for the input Scan is configured via hbase.client.scanner.caching in the
+ job configuration.
-
+
@@ -148,7 +316,7 @@
Major compactions can be requested via the HBase shell or HBaseAdmin.majorCompact.
Note: major compactions do NOT do region merges. See for more information about compactions.
-
+
@@ -157,16 +325,16 @@
$ bin/hbase org.apache.hbase.util.Merge <tablename> <region1> <region2>
If you feel you have too many regions and want to consolidate them, Merge is the utility you need. Merge must
- run be done when the cluster is down.
+ run be done when the cluster is down.
See the O'Reilly HBase Book for
an example of usage.
- Additionally, there is a Ruby script attached to HBASE-1621
+ Additionally, there is a Ruby script attached to HBASE-1621
for region merging.
-
+
Node ManagementNode DecommissionYou can stop an individual RegionServer by running the following
@@ -189,10 +357,10 @@
A downside to the above stop of a RegionServer is that regions could be offline for
a good period of time. Regions are closed in order. If many regions on the server, the
first region to close may not be back online until all regions close and after the master
- notices the RegionServer's znode gone. In HBase 0.90.2, we added facility for having
- a node gradually shed its load and then shutdown itself down. HBase 0.90.2 added the
+ notices the RegionServer's znode gone. In Apache HBase 0.90.2, we added facility for having
+ a node gradually shed its load and then shutdown itself down. Apache HBase 0.90.2 added the
graceful_stop.sh script. Here is its usage:
- $ ./bin/graceful_stop.sh
+ $ ./bin/graceful_stop.sh
Usage: graceful_stop.sh [--config &conf-dir>] [--restart] [--reload] [--thrift] [--rest] &hostname>
thrift If we should stop/start thrift before/after the hbase stop/start
rest If we should stop/start rest before/after the hbase stop/start
@@ -205,7 +373,7 @@ Usage: graceful_stop.sh [--config &conf-dir>] [--restart] [--reload] [--thri
To decommission a loaded RegionServer, run the following:
$ ./bin/graceful_stop.sh HOSTNAME
where HOSTNAME is the host carrying the RegionServer
- you would decommission.
+ you would decommission.
On HOSTNAMEThe HOSTNAME passed to graceful_stop.sh
must match the hostname that hbase is using to identify RegionServers.
@@ -227,7 +395,7 @@ Usage: graceful_stop.sh [--config &conf-dir>] [--restart] [--reload] [--thri
and because the RegionServer went down cleanly, there will be no
WAL logs to split.
Load Balancer
-
+
It is assumed that the Region Load Balancer is disabled while the
graceful_stop script runs (otherwise the balancer
and the decommission script will end up fighting over region deployments).
@@ -239,10 +407,31 @@ This turns the balancer OFF. To reenable, do:
hbase(main):001:0> balance_switch true
false
0 row(s) in 0.3590 seconds
-
+
-
+
+ Bad or Failing Disk
+ It is good having set if you have a decent number of disks
+ per machine for the case where a disk plain dies. But usually disks do the "John Wayne" -- i.e. take a while
+ to go down spewing errors in dmesg -- or for some reason, run much slower than their
+ companions. In this case you want to decommission the disk. You have two options. You can
+ decommission the datanode
+ or, less disruptive in that only the bad disks data will be rereplicated, can stop the datanode,
+ unmount the bad volume (You can't umount a volume while the datanode is using it), and then restart the
+ datanode (presuming you have set dfs.datanode.failed.volumes.tolerated > 0). The regionserver will
+ throw some errors in its logs as it recalibrates where to get its data from -- it will likely
+ roll its WAL log too -- but in general but for some latency spikes, it should keep on chugging.
+
+ If you are doing short-circuit reads, you will have to move the regions off the regionserver
+ before you stop the datanode; when short-circuiting reading, though chmod'd so regionserver cannot
+ have access, because it already has the files open, it will be able to keep reading the file blocks
+ from the bad disk even though the datanode is down. Move the regions back after you restart the
+ datanode.
+
+
+
+ Rolling Restart
@@ -300,7 +489,7 @@ false
- Metrics
+ HBase MetricsMetric SetupSee Metrics for
@@ -381,8 +570,37 @@ false
HBase Monitoring
- TODO
-
+
+ Overview
+ The following metrics are arguably the most important to monitor for each RegionServer for
+ "macro monitoring", preferably with a system like OpenTSDB.
+ If your cluster is having performance issues it's likely that you'll see something unusual with
+ this group.
+
+ HBase:
+
+ Requests
+ Compactions queue
+
+
+ OS:
+
+ IO Wait
+ User CPU
+
+
+ Java:
+
+ GC
+
+
+
+
+
+ For more information on HBase metrics, see .
+
+
+
Slow Query LogThe HBase slow query log consists of parseable JSON structures describing the properties of those client operations (Gets, Puts, Deletes, etc.) that either took too long to run, or produced too much output. The thresholds for "too long to run" and "too much output" are configurable, as described below. The output is produced inline in the main region server logs so that it is easy to discover further details from context with other logged events. It is also prepended with identifying tags (responseTooSlow), (responseTooLarge), (operationTooSlow), and (operationTooLarge) in order to enable easy filtering with grep, in case the user desires to see only slow queries.
@@ -429,7 +647,7 @@ false
-
+
Cluster ReplicationSee Cluster Replication.
@@ -437,8 +655,8 @@ false
HBase Backup
- There are two broad strategies for performing HBase backups: backing up with a full cluster shutdown, and backing up on a live cluster.
- Each approach has pros and cons.
+ There are two broad strategies for performing HBase backups: backing up with a full cluster shutdown, and backing up on a live cluster.
+ Each approach has pros and cons.
For additional information, see HBase Backup Options over on the Sematext Blog.
@@ -452,27 +670,27 @@ false
Distcp
- Distcp could be used to either copy the contents of the HBase directory in HDFS to either the same cluster in another directory, or
+ Distcp could be used to either copy the contents of the HBase directory in HDFS to either the same cluster in another directory, or
to a different cluster.
- Note: Distcp works in this situation because the cluster is down and there are no in-flight edits to files.
+ Note: Distcp works in this situation because the cluster is down and there are no in-flight edits to files.
Distcp-ing of files in the HBase directory is not generally recommended on a live cluster.
Restore (if needed)
- The backup of the hbase directory from HDFS is copied onto the 'real' hbase directory via distcp. The act of copying these files
+ The backup of the hbase directory from HDFS is copied onto the 'real' hbase directory via distcp. The act of copying these files
creates new HDFS metadata, which is why a restore of the NameNode edits from the time of the HBase backup isn't required for this kind of
restore, because it's a restore (via distcp) of a specific HDFS directory (i.e., the HBase part) not the entire HDFS file-system.
Live Cluster Backup - Replication
- This approach assumes that there is a second cluster.
+ This approach assumes that there is a second cluster.
See the HBase page on replication for more information.
Live Cluster Backup - CopyTable
- The utility could either be used to copy data from one table to another on the
+ The utility could either be used to copy data from one table to another on the
same cluster, or to copy data to another table on another cluster.
Since the cluster is up, there is a risk that edits could be missed in the copy process.
@@ -486,6 +704,106 @@ false
+
+
+ HBase Snapshots
+ HBase Snapshots allow you to take a snapshot of a table without too much impact on Region Servers.
+ Snapshot, Clone and restore operations don't involve data copying.
+ Also, Exporting the snapshot to another cluster doesn't have impact on the Region Servers.
+
+ Prior to version 0.94.6, the only way to backup or to clone a table is to use CopyTable/ExportTable,
+ or to copy all the hfiles in HDFS after disabling the table.
+ The disadvantages of these methods are that you can degrade region server performance
+ (Copy/Export Table) or you need to disable the table, that means no reads or writes;
+ and this is usually unacceptable.
+
+ Configuration
+ To turn on the snapshot support just set the
+ hbase.snapshot.enabled property to true.
+ (Snapshots are enabled by default in 0.95+ and off by default in 0.94.6+)
+
+ <property>
+ <name>hbase.snapshot.enabled</name>
+ <value>true</value>
+ </property>
+
+
+
+ Take a Snapshot
+ You can take a snapshot of a table regardless of whether it is enabled or disabled.
+ The snapshot operation doesn't involve any data copying.
+
+ $ ./bin/hbase shell
+ hbase> snapshot 'myTable', 'myTableSnapshot-122112'
+
+
+
+ Listing Snapshots
+ List all snapshots taken (by printing the names and relative information).
+
+ $ ./bin/hbase shell
+ hbase> list_snapshots
+
+
+
+ Deleting Snapshots
+ You can remove a snapshot, and the files retained for that snapshot will be removed
+ if no longer needed.
+
+ $ ./bin/hbase shell
+ hbase> delete_snapshot 'myTableSnapshot-122112'
+
+
+
+ Clone a table from snapshot
+ From a snapshot you can create a new table (clone operation) with the same data
+ that you had when the snapshot was taken.
+ The clone operation, doesn't involve data copies, and a change to the cloned table
+ doesn't impact the snapshot or the original table.
+
+ $ ./bin/hbase shell
+ hbase> clone_snapshot 'myTableSnapshot-122112', 'myNewTestTable'
+
+
+
+ Restore a snapshot
+ The restore operation requires the table to be disabled, and the table will be
+ restored to the state at the time when the snapshot was taken,
+ changing both data and schema if required.
+
+ $ ./bin/hbase shell
+ hbase> disable 'myTable'
+ hbase> restore_snapshot 'myTableSnapshot-122112'
+
+
+
+ Since Replication works at log level and snapshots at file-system level,
+ after a restore, the replicas will be in a different state from the master.
+ If you want to use restore, you need to stop replication and redo the bootstrap.
+
+
+ In case of partial data-loss due to misbehaving client, instead of a full restore
+ that requires the table to be disabled, you can clone the table from the snapshot
+ and use a Map-Reduce job to copy the data that you need, from the clone to the main one.
+
+
+ Snapshots operations and ACLs
+ If you are using security with the AccessController Coprocessor (See ),
+ only a global administrator can take, clone, or restore a snapshot, and these actions do not capture the ACL rights.
+ This means that restoring a table preserves the ACL rights of the existing table,
+ while cloning a table creates a new table that has no ACL rights until the administrator adds them.
+
+ Export to another cluster
+ The ExportSnapshot tool copies all the data related to a snapshot (hfiles, logs, snapshot metadata) to another cluster.
+ The tool executes a Map-Reduce job, similar to distcp, to copy files between the two clusters,
+ and since it works at file-system level the hbase cluster does not have to be online.
+ To copy a snapshot called MySnapshot to an HBase cluster srv2 (hdfs:///srv2:8082/hbase) using 16 mappers:
+$ bin/hbase class org.apache.hadoop.hbase.snapshot.ExportSnapshot -snapshot MySnapshot -copy-to hdfs:///srv2:8082/hbase -mappers 16
+
+
+
+
+
Capacity PlanningStorageA common question for HBase administrators is estimating how much storage will be required for an HBase cluster.
@@ -493,10 +811,10 @@ false
with a solid understanding of how HBase handles data internally (KeyValue).
KeyValue
- HBase storage will be dominated by KeyValues. See and for
- how HBase stores data internally.
+ HBase storage will be dominated by KeyValues. See and for
+ how HBase stores data internally.
- It is critical to understand that there is a KeyValue instance for every attribute stored in a row, and the
+ It is critical to understand that there is a KeyValue instance for every attribute stored in a row, and the
rowkey-length, ColumnFamily name-length and attribute lengths will drive the size of the database more than any other
factor.
diff --git a/src/docbkx/performance.xml b/src/docbkx/performance.xml
index 3ae843232698..8fb9559bd9f9 100644
--- a/src/docbkx/performance.xml
+++ b/src/docbkx/performance.xml
@@ -26,7 +26,7 @@
* limitations under the License.
*/
-->
- Performance Tuning
+ Apache HBase (TM) Performance TuningOperating System
@@ -47,7 +47,7 @@
Network
Perhaps the most important factor in avoiding network issues degrading Hadoop and HBbase performance is the switching hardware
- that is used, decisions made early in the scope of the project can cause major problems when you double or triple the size of your cluster (or more).
+ that is used, decisions made early in the scope of the project can cause major problems when you double or triple the size of your cluster (or more).
Important items to consider:
@@ -59,15 +59,15 @@
Single Switch
- The single most important factor in this configuration is that the switching capacity of the hardware is capable of
+ The single most important factor in this configuration is that the switching capacity of the hardware is capable of
handling the traffic which can be generated by all systems connected to the switch. Some lower priced commodity hardware
- can have a slower switching capacity than could be utilized by a full switch.
+ can have a slower switching capacity than could be utilized by a full switch.
Multiple SwitchesMultiple switches are a potential pitfall in the architecture. The most common configuration of lower priced hardware is a
- simple 1Gbps uplink from one switch to another. This often overlooked pinch point can easily become a bottleneck for cluster communication.
+ simple 1Gbps uplink from one switch to another. This often overlooked pinch point can easily become a bottleneck for cluster communication.
Especially with MapReduce jobs that are both reading and writing a lot of data the communication across this uplink could be saturated.
Mitigation of this issue is fairly simple and can be accomplished in multiple ways:
@@ -85,22 +85,27 @@
Poor switch capacity performanceInsufficient uplink to another rack
- If the the switches in your rack have appropriate switching capacity to handle all the hosts at full speed, the next most likely issue will be caused by homing
+ If the the switches in your rack have appropriate switching capacity to handle all the hosts at full speed, the next most likely issue will be caused by homing
more of your cluster across racks. The easiest way to avoid issues when spanning multiple racks is to use port trunking to create a bonded uplink to other racks.
The downside of this method however, is in the overhead of ports that could potentially be used. An example of this is, creating an 8Gbps port channel from rack
- A to rack B, using 8 of your 24 ports to communicate between racks gives you a poor ROI, using too few however can mean you're not getting the most out of your cluster.
+ A to rack B, using 8 of your 24 ports to communicate between racks gives you a poor ROI, using too few however can mean you're not getting the most out of your cluster.
Using 10Gbe links between racks will greatly increase performance, and assuming your switches support a 10Gbe uplink or allow for an expansion card will allow you to
save your ports for machines as opposed to uplinks.
-
+
+
+ Network Interfaces
+ Are all the network interfaces functioning correctly? Are you sure? See the Troubleshooting Case Study in .
+
+
Java
- The Garbage Collector and HBase
+ The Garbage Collector and Apache HBaseLong GC pauses
@@ -117,13 +122,20 @@
threshold, the more GCing is done, the more CPU used). To address the
second fragmentation issue, Todd added an experimental facility,
MSLAB, that
- must be explicitly enabled in HBase 0.90.x (Its defaulted to be on in
- 0.92.x HBase). See hbase.hregion.memstore.mslab.enabled
+ must be explicitly enabled in Apache HBase 0.90.x (Its defaulted to be on in
+ Apache 0.92.x HBase). See hbase.hregion.memstore.mslab.enabled
to true in your Configuration. See the cited
slides for background and detailThe latest jvms do better
regards fragmentation so make sure you are running a recent release.
Read down in the message,
- Identifying concurrent mode failures caused by fragmentation..
+ Identifying concurrent mode failures caused by fragmentation..
+ Be aware that when enabled, each MemStore instance will occupy at least
+ an MSLAB instance of memory. If you have thousands of regions or lots
+ of regions each with many column families, this allocation of MSLAB
+ may be responsible for a good portion of your heap allocation and in
+ an extreme case cause you to OOME. Disable MSLAB in this case, or
+ lower the amount of memory it uses or float less regions per server.
+
For more information about GC logs, see .
@@ -135,6 +147,7 @@
See .
+
Number of Regions
@@ -153,41 +166,52 @@
hbase.regionserver.handler.count
- See .
+ See .
hfile.block.cache.size
- See .
+ See .
A memory setting for the RegionServer process.
-
+ hbase.regionserver.global.memstore.upperLimit
- See .
+ See .
This memory setting is often adjusted for the RegionServer process depending on needs.
-
+ hbase.regionserver.global.memstore.lowerLimit
- See .
+ See .
This memory setting is often adjusted for the RegionServer process depending on needs.
hbase.hstore.blockingStoreFiles
- See .
+ See .
If there is blocking in the RegionServer logs, increasing this can help.
hbase.hregion.memstore.block.multiplier
- See .
- If there is enough RAM, increasing this can help.
+ See .
+ If there is enough RAM, increasing this can help.
+
+
+
+ hbase.regionserver.checksum.verify
+ Have HBase write the checksum into the datablock and save
+ having to do the checksum seek whenever you read. See the
+ release note on HBASE-5074 support checksums in HBase block cache.
+
+
+
+
ZooKeeperSee for information on configuring ZooKeeper, and see the part
@@ -196,19 +220,19 @@
Schema Design
-
+
Number of Column FamiliesSee .Key and Attribute Lengths
- See . See also for
+ See . See also for
compression caveats.Table RegionSizeThe regionsize can be set on a per-table basis via setFileSize on
- HTableDescriptor in the
+ HTableDescriptor in the
event where certain tables require different regionsizes than the configured default regionsize.
See for more information.
@@ -224,22 +248,23 @@
on each insert. If ROWCOL, the hash of the row +
column family + column family qualifier will be added to the bloom on
each key insert.
- See HColumnDescriptor and
- for more information.
+ See HColumnDescriptor and
+ for more information or this answer up in quora,
+How are bloom filters used in HBase?.
ColumnFamily BlockSize
- The blocksize can be configured for each ColumnFamily in a table, and this defaults to 64k. Larger cell values require larger blocksizes.
+ The blocksize can be configured for each ColumnFamily in a table, and this defaults to 64k. Larger cell values require larger blocksizes.
There is an inverse relationship between blocksize and the resulting StoreFile indexes (i.e., if the blocksize is doubled then the resulting
indexes should be roughly halved).
- See HColumnDescriptor
+ See HColumnDescriptor
and for more information.
In-Memory ColumnFamilies
- ColumnFamilies can optionally be defined as in-memory. Data is still persisted to disk, just like any other ColumnFamily.
+ ColumnFamilies can optionally be defined as in-memory. Data is still persisted to disk, just like any other ColumnFamily.
In-memory blocks have the highest priority in the , but it is not a guarantee that the entire table
will be in memory.
@@ -251,24 +276,24 @@
Production systems should use compression with their ColumnFamily definitions. See for more information.
However...
- Compression deflates data on disk. When it's in-memory (e.g., in the
+ Compression deflates data on disk. When it's in-memory (e.g., in the
MemStore) or on the wire (e.g., transferring between RegionServer and Client) it's inflated.
So while using ColumnFamily compression is a best practice, but it's not going to completely eliminate
- the impact of over-sized Keys, over-sized ColumnFamily names, or over-sized Column names.
+ the impact of over-sized Keys, over-sized ColumnFamily names, or over-sized Column names.
See on for schema design tips, and for more information on HBase stores data internally.
-
+
-
+
Writing to HBaseBatch LoadingUse the bulk load tool if you can. See
- Bulk Loads.
+ .
Otherwise, pay attention to the below.
@@ -278,35 +303,27 @@
Table Creation: Pre-Creating Regions
-Tables in HBase are initially created with one region by default. For bulk imports, this means that all clients will write to the same region until it is large enough to split and become distributed across the cluster. A useful pattern to speed up the bulk import process is to pre-create empty regions. Be somewhat conservative in this, because too-many regions can actually degrade performance. An example of pre-creation using hex-keys is as follows (note: this example may need to be tweaked to the individual applications keys):
+Tables in HBase are initially created with one region by default. For bulk imports, this means that all clients will write to the same region
+until it is large enough to split and become distributed across the cluster. A useful pattern to speed up the bulk import process is to pre-create empty regions.
+ Be somewhat conservative in this, because too-many regions can actually degrade performance.
+ There are two different approaches to pre-creating splits. The first approach is to rely on the default HBaseAdmin strategy
+ (which is implemented in Bytes.split)...
+
+
+byte[] startKey = ...; // your lowest keuy
+byte[] endKey = ...; // your highest key
+int numberOfRegions = ...; // # of regions to create
+admin.createTable(table, startKey, endKey, numberOfRegions);
+
+ And the other approach is to define the splits yourself...
+
+
+byte[][] splits = ...; // create your own splits
+admin.createTable(table, splits);
+
-public static boolean createTable(HBaseAdmin admin, HTableDescriptor table, byte[][] splits)
-throws IOException {
- try {
- admin.createTable( table, splits );
- return true;
- } catch (TableExistsException e) {
- logger.info("table " + table.getNameAsString() + " already exists");
- // the table already exists...
- return false;
- }
-}
-
-public static byte[][] getHexSplits(String startKey, String endKey, int numRegions) {
- byte[][] splits = new byte[numRegions-1][];
- BigInteger lowestKey = new BigInteger(startKey, 16);
- BigInteger highestKey = new BigInteger(endKey, 16);
- BigInteger range = highestKey.subtract(lowestKey);
- BigInteger regionIncrement = range.divide(BigInteger.valueOf(numRegions));
- lowestKey = lowestKey.add(regionIncrement);
- for(int i=0; i < numRegions-1;i++) {
- BigInteger key = lowestKey.add(regionIncrement.multiply(BigInteger.valueOf(i)));
- byte[] b = String.format("%016x", key).getBytes();
- splits[i] = b;
- }
- return splits;
-}
+ See for issues related to understanding your keyspace and pre-creating regions.
@@ -314,7 +331,7 @@ public static byte[][] getHexSplits(String startKey, String endKey, int numRegio
Table Creation: Deferred Log Flush
-The default behavior for Puts using the Write Ahead Log (WAL) is that HLog edits will be written immediately. If deferred log flush is used,
+The default behavior for Puts using the Write Ahead Log (WAL) is that HLog edits will be written immediately. If deferred log flush is used,
WAL edits are kept in memory until the flush period. The benefit is aggregated and asynchronous HLog- writes, but the potential downside is that if
the RegionServer goes down the yet-to-be-flushed edits are lost. This is safer, however, than not using WAL at all with Puts.
@@ -322,7 +339,7 @@ WAL edits are kept in memory until the flush period. The benefit is aggregated
Deferred log flush can be configured on tables via HTableDescriptor. The default value of hbase.regionserver.optionallogflushinterval is 1000ms.
-
+ HBase Client: AutoFlush
@@ -348,25 +365,25 @@ Deferred log flush can be configured on tables via
In general, it is best to use WAL for Puts, and where loading throughput
- is a concern to use bulk loading techniques instead.
+ is a concern to use bulk loading techniques instead.
HBase Client: Group Puts by RegionServer
- In addition to using the writeBuffer, grouping Puts by RegionServer can reduce the number of client RPC calls per writeBuffer flush.
+ In addition to using the writeBuffer, grouping Puts by RegionServer can reduce the number of client RPC calls per writeBuffer flush.
There is a utility HTableUtil currently on TRUNK that does this, but you can either copy that or implement your own verison for
those still on 0.90.x or earlier.
-
+
MapReduce: Skip The ReducerWhen writing a lot of data to an HBase table from a MR job (e.g., with TableOutputFormat), and specifically where Puts are being emitted
- from the Mapper, skip the Reducer step. When a Reducer step is used, all of the output (Puts) from the Mapper will get spooled to disk, then sorted/shuffled to other
- Reducers that will most likely be off-node. It's far more efficient to just write directly to HBase.
+ from the Mapper, skip the Reducer step. When a Reducer step is used, all of the output (Puts) from the Mapper will get spooled to disk, then sorted/shuffled to other
+ Reducers that will most likely be off-node. It's far more efficient to just write directly to HBase.
- For summary jobs where HBase is used as a source and a sink, then writes will be coming from the Reducer step (e.g., summarize values then write out result).
- This is a different processing problem than from the the above case.
+ For summary jobs where HBase is used as a source and a sink, then writes will be coming from the Reducer step (e.g., summarize values then write out result).
+ This is a different processing problem than from the the above case.
@@ -375,16 +392,16 @@ Deferred log flush can be configured on tables via If all your data is being written to one region at a time, then re-read the
section on processing timeseries data.
Also, if you are pre-splitting regions and all your data is still winding up in a single region even though
- your keys aren't monotonically increasing, confirm that your keyspace actually works with the split strategy. There are a
+ your keys aren't monotonically increasing, confirm that your keyspace actually works with the split strategy. There are a
variety of reasons that regions may appear "well split" but won't work with your data. As
- the HBase client communicates directly with the RegionServers, this can be obtained via
+ the HBase client communicates directly with the RegionServers, this can be obtained via
HTable.getRegionLocation.
- See , as well as
+ See , as well as
-
+
Reading from HBase
@@ -406,7 +423,7 @@ Deferred log flush can be configured on tables via Scan settings in MapReduce jobs deserve special attention. Timeouts can result (e.g., UnknownScannerException)
in Map tasks if it takes longer to process a batch of records before the client goes back to the RegionServer for the
next set of data. This problem can occur because there is non-trivial processing occuring per row. If you process
- rows quickly, set caching higher. If you process rows more slowly (e.g., lots of transformations per row, writes),
+ rows quickly, set caching higher. If you process rows more slowly (e.g., lots of transformations per row, writes),
then set caching lower.
Timeouts can also happen in a non-MapReduce use case (i.e., single threaded HBase client doing a Scan), but the
@@ -424,6 +441,27 @@ Deferred log flush can be configured on tables via
+
+ Avoid scan seeks
+ When columns are selected explicitly with scan.addColumn, HBase will schedule seek operations to seek between the
+ selected columns. When rows have few columns and each column has only a few versions this can be inefficient. A seek operation is generally
+ slower if does not seek at least past 5-10 columns/versions or 512-1024 bytes.
+ In order to opportunistically look ahead a few columns/versions to see if the next column/version can be found that
+ way before a seek operation is scheduled, a new attribute Scan.HINT_LOOKAHEAD can be set the on Scan object. The following code instructs the
+ RegionServer to attempt two iterations of next before a seek is scheduled:
+Scan scan = new Scan();
+scan.addColumn(...);
+scan.setAttribute(Scan.HINT_LOOKAHEAD, Bytes.toBytes(2));
+table.getScanner(scan);
+
+
+
+ MapReduce - Input Splits
+ For MapReduce jobs that use HBase tables as a source, if there a pattern where the "slow" map tasks seem to
+ have the same Input Split (i.e., the RegionServer serving the data), see the
+ Troubleshooting Case Study in .
+
+ Close ResultScanners
@@ -469,13 +507,103 @@ htable.close();
Concurrency: Monitor Data Spread
- When performing a high number of concurrent reads, monitor the data spread of the target tables. If the target table(s) have
+ When performing a high number of concurrent reads, monitor the data spread of the target tables. If the target table(s) have
too few regions then the reads could likely be served from too few nodes.
- See , as well as
+ See , as well as
-
+
+ Bloom Filters
+ Enabling Bloom Filters can save your having to go to disk and
+ can help improve read latencys.
+ Bloom filters were developed over in HBase-1200
+ Add bloomfilters.
+ For description of the development process -- why static blooms
+ rather than dynamic -- and for an overview of the unique properties
+ that pertain to blooms in HBase, as well as possible future
+ directions, see the Development Process section
+ of the document BloomFilters
+ in HBase attached to HBase-1200.
+
+ The bloom filters described here are actually version two of
+ blooms in HBase. In versions up to 0.19.x, HBase had a dynamic bloom
+ option based on work done by the European Commission One-Lab
+ Project 034819. The core of the HBase bloom work was later
+ pulled up into Hadoop to implement org.apache.hadoop.io.BloomMapFile.
+ Version 1 of HBase blooms never worked that well. Version 2 is a
+ rewrite from scratch though again it starts with the one-lab
+ work.
+
+ See also .
+
+
+
+ Bloom StoreFile footprint
+
+ Bloom filters add an entry to the StoreFile
+ general FileInfo data structure and then two
+ extra entries to the StoreFile metadata
+ section.
+
+
+ BloomFilter in the StoreFile
+ FileInfo data structure
+
+ FileInfo has a
+ BLOOM_FILTER_TYPE entry which is set to
+ NONE, ROW or
+ ROWCOL.
+
+
+
+ BloomFilter entries in StoreFile
+ metadata
+
+ BLOOM_FILTER_META holds Bloom Size, Hash
+ Function used, etc. Its small in size and is cached on
+ StoreFile.Reader load
+ BLOOM_FILTER_DATA is the actual bloomfilter
+ data. Obtained on-demand. Stored in the LRU cache, if it is enabled
+ (Its enabled by default).
+
+
+
+ Bloom Filter Configuration
+
+ io.hfile.bloom.enabled global kill
+ switch
+
+ io.hfile.bloom.enabled in
+ Configuration serves as the kill switch in case
+ something goes wrong. Default = true.
+
+
+
+ io.hfile.bloom.error.rate
+
+ io.hfile.bloom.error.rate = average false
+ positive rate. Default = 1%. Decrease rate by ½ (e.g. to .5%) == +1
+ bit per bloom entry.
+
+
+
+ io.hfile.bloom.max.fold
+
+ io.hfile.bloom.max.fold = guaranteed minimum
+ fold rate. Most people should leave this alone. Default = 7, or can
+ collapse to at least 1/128th of original size. See the
+ Development Process section of the document BloomFilters
+ in HBase for more on what this option means.
+
+
+
+
-
+
Deleting from HBase
@@ -503,21 +631,54 @@ htable.close();
Current Issues With Low-Latency ReadsThe original use-case for HDFS was batch processing. As such, there low-latency reads were historically not a priority.
- With the increased adoption of HBase this is changing, and several improvements are already in development.
- See the
+ With the increased adoption of Apache HBase this is changing, and several improvements are already in development.
+ See the
Umbrella Jira Ticket for HDFS Improvements for HBase.
+
+ Leveraging local data
+Since Hadoop 1.0.0 (also 0.22.1, 0.23.1, CDH3u3 and HDP 1.0) via
+HDFS-2246,
+it is possible for the DFSClient to take a "short circuit" and
+read directly from disk instead of going through the DataNode when the
+data is local. What this means for HBase is that the RegionServers can
+read directly off their machine's disks instead of having to open a
+socket to talk to the DataNode, the former being generally much
+fasterSee JD's Performance Talk.
+Also see HBase, mail # dev - read short circuit thread for
+more discussion around short circuit reads.
+
+To enable "short circuit" reads, you must set two configurations.
+First, the hdfs-site.xml needs to be amended. Set
+the property dfs.block.local-path-access.user
+to be the only user that can use the shortcut.
+This has to be the user that started HBase. Then in hbase-site.xml,
+set dfs.client.read.shortcircuit to be true
+
+
+ For optimal performance when short-circuit reads are enabled, it is recommended that HDFS checksums are disabled.
+ To maintain data integrity with HDFS checksums disabled, HBase can be configured to write its own checksums into
+ its datablocks and verify against these. See .
+
+
+The DataNodes need to be restarted in order to pick up the new
+configuration. Be aware that if a process started under another
+username than the one configured here also has the shortcircuit
+enabled, it will get an Exception regarding an unauthorized access but
+the data will still be read.
+
+ Performance Comparisons of HBase vs. HDFS
- A fairly common question on the dist-list is why HBase isn't as performant as HDFS files in a batch context (e.g., as
- a MapReduce source or sink). The short answer is that HBase is doing a lot more than HDFS (e.g., reading the KeyValues,
- returning the most current row or specified timestamps, etc.), and as such HBase is 4-5 times slower than HDFS in this
+ A fairly common question on the dist-list is why HBase isn't as performant as HDFS files in a batch context (e.g., as
+ a MapReduce source or sink). The short answer is that HBase is doing a lot more than HDFS (e.g., reading the KeyValues,
+ returning the most current row or specified timestamps, etc.), and as such HBase is 4-5 times slower than HDFS in this
processing context. Not that there isn't room for improvement (and this gap will, over time, be reduced), but HDFS
will always be faster in this use-case.
-
+
Amazon EC2Performance questions are common on Amazon EC2 environments because it is a shared environment. You will
not see the same throughput as a dedicated server. In terms of running tests on EC2, run them several times for the same
@@ -527,4 +688,9 @@ htable.close();
because EC2 issues are practically a separate class of performance issues.
+
+ Case Studies
+ For Performance and Troubleshooting Case Studies, see .
+
+
diff --git a/src/docbkx/preface.xml b/src/docbkx/preface.xml
index 2d9f39d1c678..af54aa29749a 100644
--- a/src/docbkx/preface.xml
+++ b/src/docbkx/preface.xml
@@ -33,7 +33,7 @@
Herein you will find either the definitive documentation on an HBase topic
as of its standing when the referenced HBase version shipped, or it
will point to the location in javadoc,
+ xlink:href="http://hbase.apache.org/apidocs/index.html">javadoc,
JIRA
or wiki where
the pertinent information can be found.
diff --git a/src/docbkx/security.xml b/src/docbkx/security.xml
new file mode 100644
index 000000000000..ed4a0c2ed638
--- /dev/null
+++ b/src/docbkx/security.xml
@@ -0,0 +1,532 @@
+
+
+
+Secure Apache HBase (TM)
+
+ Secure Client Access to Apache HBase
+ Newer releases of Apache HBase (TM) (>= 0.92) support optional SASL authentication of clientsSee
+ also Matteo Bertozzi's article on Understanding User Authentication and Authorization in Apache HBase..
+ This describes how to set up Apache HBase and clients for connection to secure HBase resources.
+
+ Prerequisites
+
+ You need to have a working Kerberos KDC.
+
+
+ A HBase configured for secure client access is expected to be running
+ on top of a secured HDFS cluster. HBase must be able to authenticate
+ to HDFS services. HBase needs Kerberos credentials to interact with
+ the Kerberos-enabled HDFS daemons. Authenticating a service should be
+ done using a keytab file. The procedure for creating keytabs for HBase
+ service is the same as for creating keytabs for Hadoop. Those steps
+ are omitted here. Copy the resulting keytab files to wherever HBase
+ Master and RegionServer processes are deployed and make them readable
+ only to the user account under which the HBase daemons will run.
+
+
+ A Kerberos principal has three parts, with the form
+ username/fully.qualified.domain.name@YOUR-REALM.COM. We
+ recommend using hbase as the username portion.
+
+
+ The following is an example of the configuration properties for
+ Kerberos operation that must be added to the
+ hbase-site.xml file on every server machine in the
+ cluster. Required for even the most basic interactions with a
+ secure Hadoop configuration, independent of HBase security.
+
+
+ hbase.regionserver.kerberos.principal
+ hbase/_HOST@YOUR-REALM.COM
+
+
+ hbase.regionserver.keytab.file
+ /etc/hbase/conf/keytab.krb5
+
+
+ hbase.master.kerberos.principal
+ hbase/_HOST@YOUR-REALM.COM
+
+
+ hbase.master.keytab.file
+ /etc/hbase/conf/keytab.krb5
+
+ ]]>
+
+ Each HBase client user should also be given a Kerberos principal. This
+ principal should have a password assigned to it (as opposed to a
+ keytab file). The client principal's maxrenewlife should
+ be set so that it can be renewed enough times for the HBase client
+ process to complete. For example, if a user runs a long-running HBase
+ client process that takes at most 3 days, we might create this user's
+ principal within kadmin with: addprinc -maxrenewlife
+ 3days
+
+
+ Long running daemons with indefinite lifetimes that require client
+ access to HBase can instead be configured to log in from a keytab. For
+ each host running such daemons, create a keytab with
+ kadmin or kadmin.local. The procedure for
+ creating keytabs for HBase service is the same as for creating
+ keytabs for Hadoop. Those steps are omitted here. Copy the resulting
+ keytab files to where the client daemon will execute and make them
+ readable only to the user account under which the daemon will run.
+
+
+
+ Server-side Configuration for Secure Operation
+
+ Add the following to the hbase-site.xml file on every server machine in the cluster:
+
+
+ hbase.security.authentication
+ kerberos
+
+
+ hbase.security.authorization
+ true
+
+
+ hbase.coprocessor.region.classes
+ org.apache.hadoop.hbase.security.token.TokenProvider
+
+ ]]>
+
+ A full shutdown and restart of HBase service is required when deploying
+ these configuration changes.
+
+
+
+ Client-side Configuration for Secure Operation
+
+ Add the following to the hbase-site.xml file on every client:
+
+
+ hbase.security.authentication
+ kerberos
+
+ ]]>
+
+ The client environment must be logged in to Kerberos from KDC or
+ keytab via the kinit command before communication with
+ the HBase cluster will be possible.
+
+
+ Be advised that if the hbase.security.authentication
+ in the client- and server-side site files do not match, the client will
+ not be able to communicate with the cluster.
+
+
+ Once HBase is configured for secure RPC it is possible to optionally
+ configure encrypted communication. To do so, add the following to the
+ hbase-site.xml file on every client:
+
+
+ hbase.rpc.protection
+ privacy
+
+ ]]>
+
+ This configuration property can also be set on a per connection basis.
+ Set it in the Configuration supplied to
+ HTable:
+
+
+ Configuration conf = HBaseConfiguration.create();
+ conf.set("hbase.rpc.protection", "privacy");
+ HTable table = new HTable(conf, tablename);
+
+
+ Expect a ~10% performance penalty for encrypted communication.
+
+
+
+ Client-side Configuration for Secure Operation - Thrift Gateway
+
+ Add the following to the hbase-site.xml file for every Thrift gateway:
+
+ hbase.thrift.keytab.file
+ /etc/hbase/conf/hbase.keytab
+
+
+ hbase.thrift.kerberos.principal
+ $USER/_HOST@HADOOP.LOCALDOMAIN
+
+ ]]>
+
+
+ Substitute the appropriate credential and keytab for $USER and $KEYTAB
+ respectively.
+
+
+ The Thrift gateway will authenticate with HBase using the supplied
+ credential. No authentication will be performed by the Thrift gateway
+ itself. All client access via the Thrift gateway will use the Thrift
+ gateway's credential and have its privilege.
+
+
+
+ Client-side Configuration for Secure Operation - REST Gateway
+
+ Add the following to the hbase-site.xml file for every REST gateway:
+
+ hbase.rest.keytab.file
+ $KEYTAB
+
+
+ hbase.rest.kerberos.principal
+ $USER/_HOST@HADOOP.LOCALDOMAIN
+
+ ]]>
+
+
+ Substitute the appropriate credential and keytab for $USER and $KEYTAB
+ respectively.
+
+
+ The REST gateway will authenticate with HBase using the supplied
+ credential. No authentication will be performed by the REST gateway
+ itself. All client access via the REST gateway will use the REST
+ gateway's credential and have its privilege.
+
+
+ It should be possible for clients to authenticate with the HBase
+ cluster through the REST gateway in a pass-through manner via SPEGNO
+ HTTP authentication. This is future work.
+
+
+
+
+
+
+
+ Access Control
+
+ Newer releases of Apache HBase (>= 0.92) support optional access control
+ list (ACL-) based protection of resources on a column family and/or
+ table basis.
+
+
+ This describes how to set up Secure HBase for access control, with an
+ example of granting and revoking user permission on table resources
+ provided.
+
+
+ Prerequisites
+
+ You must configure HBase for secure operation. Refer to the section
+ "Secure Client Access to HBase" and complete all of the steps described
+ there.
+
+
+ You must also configure ZooKeeper for secure operation. Changes to ACLs
+ are synchronized throughout the cluster using ZooKeeper. Secure
+ authentication to ZooKeeper must be enabled or otherwise it will be
+ possible to subvert HBase access control via direct client access to
+ ZooKeeper. Refer to the section on secure ZooKeeper configuration and
+ complete all of the steps described there.
+
+
+
+ Overview
+
+ With Secure RPC and Access Control enabled, client access to HBase is
+ authenticated and user data is private unless access has been
+ explicitly granted. Access to data can be granted at a table or per
+ column family basis.
+
+
+ However, the following items have been left out of the initial
+ implementation for simplicity:
+
+
+
+ Row-level or per value (cell): This would require broader changes for storing the ACLs inline with rows. It is a future goal.
+
+
+ Push down of file ownership to HDFS: HBase is not designed for the case where files may have different permissions than the HBase system principal. Pushing file ownership down into HDFS would necessitate changes to core code. Also, while HDFS file ownership would make applying quotas easy, and possibly make bulk imports more straightforward, it is not clear that it would offer a more secure setup.
+
+
+ HBase managed "roles" as collections of permissions: We will not model "roles" internally in HBase to begin with. We instead allow group names to be granted permissions, which allows external modeling of roles via group membership. Groups are created and manipulated externally to HBase, via the Hadoop group mapping service.
+
+
+
+Access control mechanisms are mature and fairly standardized in the relational database world. The HBase implementation approximates current convention, but HBase has a simpler feature set than relational databases, especially in terms of client operations. We don't distinguish between an insert (new record) and update (of existing record), for example, as both collapse down into a Put. Accordingly, the important operations condense to four permissions: READ, WRITE, CREATE, and ADMIN.
+
+
+
+ Permissions can be granted in any of the following scopes, though
+ CREATE and ADMIN permissions are effective only at table scope.
+
+
+
+
+ Table
+
+
+ Read: User can read from any column family in table
+ Write: User can write to any column family in table
+ Create: User can alter table attributes; add, alter, or drop column families; and drop the table.
+ Admin: User can alter table attributes; add, alter, or drop column families; and enable, disable, or drop the table. User can also trigger region (re)assignments or relocation.
+
+
+
+
+ Column Family
+
+
+ Read: User can read from the column family
+ Write: User can write to the column family
+
+
+
+
+
+
+ There is also an implicit global scope for the superuser.
+
+
+ The superuser is a principal, specified in the HBase site configuration
+ file, that has equivalent access to HBase as the 'root' user would on a
+ UNIX derived system. Normally this is the principal that the HBase
+ processes themselves authenticate as. Although future versions of HBase
+ Access Control may support multiple superusers, the superuser privilege
+ will always include the principal used to run the HMaster process. Only
+ the superuser is allowed to create tables, switch the balancer on or
+ off, or take other actions with global consequence. Furthermore, the
+ superuser has an implicit grant of all permissions to all resources.
+
+
+ Tables have a new metadata attribute: OWNER, the user principal who owns
+ the table. By default this will be set to the user principal who creates
+ the table, though it may be changed at table creation time or during an
+ alter operation by setting or changing the OWNER table attribute. Only a
+ single user principal can own a table at a given time. A table owner will
+ have all permissions over a given table.
+
+
+
+ Server-side Configuration for Access Control
+
+ Enable the AccessController coprocessor in the cluster configuration
+ and restart HBase. The restart can be a rolling one. Complete the
+ restart of all Master and RegionServer processes before setting up
+ ACLs.
+
+
+ To enable the AccessController, modify the hbase-site.xml file on every server machine in the cluster to look like:
+
+
+ hbase.coprocessor.master.classes
+ org.apache.hadoop.hbase.security.access.AccessController
+
+
+ hbase.coprocessor.region.classes
+ org.apache.hadoop.hbase.security.token.TokenProvider,
+ org.apache.hadoop.hbase.security.access.AccessController
+
+ ]]>
+
+
+ Shell Enhancements for Access Control
+
+The HBase shell has been extended to provide simple commands for editing and updating user permissions. The following commands have been added for access control list management:
+
+ Grant
+
+
+ grant <user> <permissions> <table> [ <column family> [ <column qualifier> ] ]
+
+
+
+ <permissions> is zero or more letters from the set "RWCA": READ('R'), WRITE('W'), CREATE('C'), ADMIN('A').
+
+
+ Note: Grants and revocations of individual permissions on a resource are both accomplished using the grant command. A separate revoke command is also provided by the shell, but this is for fast revocation of all of a user's access rights to a given resource only.
+
+
+ Revoke
+
+
+
+ revoke <user> <table> [ <column family> [ <column qualifier> ] ]
+
+
+
+ Alter
+
+
+ The alter command has been extended to allow ownership assignment:
+
+ alter 'tablename', {OWNER => 'username'}
+
+
+
+ User Permission
+
+
+ The user_permission command shows all access permissions for the current user for a given table:
+
+ user_permission <table>
+
+
+
+
+
+
+
+ Secure Bulk Load
+
+ Bulk loading in secure mode is a bit more involved than normal setup, since the client has to transfer the ownership of the files generated from the mapreduce job to HBase. Secure bulk loading is implemented by a coprocessor, named SecureBulkLoadEndpoint. SecureBulkLoadEndpoint uses a staging directory "hbase.bulkload.staging.dir", which defaults to /tmp/hbase-staging/. The algorithm is as follows.
+
+ Create an hbase owned staging directory which is world traversable (-rwx--x--x, 711) /tmp/hbase-staging.
+ A user writes out data to his secure output directory: /user/foo/data
+ A call is made to hbase to create a secret staging directory
+ which is globally readable/writable (-rwxrwxrwx, 777): /tmp/hbase-staging/averylongandrandomdirectoryname
+ The user makes the data world readable and writable, then moves it
+ into the random staging directory, then calls bulkLoadHFiles()
+
+
+
+ Like delegation tokens the strength of the security lies in the length
+ and randomness of the secret directory.
+
+
+
+ You have to enable the secure bulk load to work properly. You can modify the hbase-site.xml file on every server machine in the cluster and add the SecureBulkLoadEndpoint class to the list of regionserver coprocessors:
+
+
+ hbase.bulkload.staging.dir
+ /tmp/hbase-staging
+
+
+ hbase.coprocessor.region.classes
+ org.apache.hadoop.hbase.security.token.TokenProvider,
+ org.apache.hadoop.hbase.security.access.AccessController,org.apache.hadoop.hbase.security.access.SecureBulkLoadEndpoint
+
+ ]]>
+
+
diff --git a/src/docbkx/shell.xml b/src/docbkx/shell.xml
index 4fbab08d2236..2a1535336189 100644
--- a/src/docbkx/shell.xml
+++ b/src/docbkx/shell.xml
@@ -26,13 +26,13 @@
* limitations under the License.
*/
-->
- The HBase Shell
+ The Apache HBase Shell
- The HBase Shell is (J)Ruby's
+ The Apache HBase (TM) Shell is (J)Ruby's
IRB with some HBase particular commands added. Anything you can do in
IRB, you should be able to do in the HBase Shell.
- To run the HBase shell,
+ To run the HBase shell,
do as follows:
$ ./bin/hbase shell
@@ -47,7 +47,7 @@
for example basic shell operation.Scripting
- For examples scripting HBase, look in the
+ For examples scripting Apache HBase, look in the
HBase bin directory. Look at the files
that end in *.rb. To run one of these
files, do as follows:
@@ -104,5 +104,16 @@
+ Commands
+ count
+ Count command returns the number of rows in a table.
+ It's quite fast when configured with the right CACHE
+ hbase> count '<tablename>', CACHE => 1000
+ The above count fetches 1000 rows at a time. Set CACHE lower if your rows are big.
+ Default is to fetch one row at a time.
+
+
+
+
diff --git a/src/docbkx/troubleshooting.xml b/src/docbkx/troubleshooting.xml
index a92d9794e925..5967b03a3d65 100644
--- a/src/docbkx/troubleshooting.xml
+++ b/src/docbkx/troubleshooting.xml
@@ -26,7 +26,7 @@
* limitations under the License.
*/
-->
- Troubleshooting and Debugging HBase
+ Troubleshooting and Debugging Apache HBase (TM)General Guidelines
@@ -37,7 +37,7 @@
should return some hits for those exceptions you’re seeing.
- An error rarely comes alone in HBase, usually when something gets screwed up what will
+ An error rarely comes alone in Apache HBase (TM), usually when something gets screwed up what will
follow may be hundreds of exceptions and stack traces coming from all over the place.
The best way to approach this type of problem is to walk the log up to where it all
began, for example one trick with RegionServers is that they will print some
@@ -54,7 +54,7 @@
prolonged garbage collection pauses that last longer than the default ZooKeeper session timeout.
For more information on GC pauses, see the
3 part blog post by Todd Lipcon
- and above.
+ and above.
@@ -72,7 +72,7 @@
JobTracker: $HADOOP_HOME/logs/hadoop-<user>-jobtracker-<hostname>.log
- TaskTracker: $HADOOP_HOME/logs/hadoop-<user>-jobtracker-<hostname>.log
+ TaskTracker: $HADOOP_HOME/logs/hadoop-<user>-tasktracker-<hostname>.log
HMaster: $HBASE_HOME/logs/hbase-<user>-master-<hostname>.log
@@ -91,7 +91,7 @@
NameNodeThe NameNode log is on the NameNode server. The HBase Master is typically run on the NameNode server, and well as ZooKeeper.For smaller clusters the JobTracker is typically run on the NameNode server as well.
-
+
DataNodeEach DataNode server will have a DataNode log for HDFS, as well as a RegionServer log for HBase.
@@ -105,32 +105,32 @@
insight on timings at the server. Once enabled, the amount of log
spewed is voluminous. It is not recommended that you leave this
logging on for more than short bursts of time. To enable RPC-level
- logging, browse to the RegionServer UI and click on
+ logging, browse to the RegionServer UI and click on
Log Level. Set the log level to DEBUG for the package
org.apache.hadoop.ipc (Thats right, for
hadoop.ipc, NOT, hbase.ipc). Then tail the RegionServers log. Analyze.
To disable, set the logging level back to INFO level.
-
-
+
+
JVM Garbage Collection Logs
- HBase is memory intensive, and using the default GC you can see long pauses in all threads including the Juliet Pause aka "GC of Death".
- To help debug this or confirm this is happening GC logging can be turned on in the Java virtual machine.
+ HBase is memory intensive, and using the default GC you can see long pauses in all threads including the Juliet Pause aka "GC of Death".
+ To help debug this or confirm this is happening GC logging can be turned on in the Java virtual machine.
To enable, in hbase-env.sh add:
-
+
export HBASE_OPTS="-XX:+UseConcMarkSweepGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/home/hadoop/hbase/logs/gc-hbase.log"
- Adjust the log directory to wherever you log. Note: The GC log does NOT roll automatically, so you'll have to keep an eye on it so it doesn't fill up the disk.
+ Adjust the log directory to wherever you log. Note: The GC log does NOT roll automatically, so you'll have to keep an eye on it so it doesn't fill up the disk.
At this point you should see logs like so:
-64898.952: [GC [1 CMS-initial-mark: 2811538K(3055704K)] 2812179K(3061272K), 0.0007360 secs] [Times: user=0.00 sys=0.00, real=0.00 secs]
+64898.952: [GC [1 CMS-initial-mark: 2811538K(3055704K)] 2812179K(3061272K), 0.0007360 secs] [Times: user=0.00 sys=0.00, real=0.00 secs]
64898.953: [CMS-concurrent-mark-start]
-64898.971: [GC 64898.971: [ParNew: 5567K->576K(5568K), 0.0101110 secs] 2817105K->2812715K(3061272K), 0.0102200 secs] [Times: user=0.07 sys=0.00, real=0.01 secs]
+64898.971: [GC 64898.971: [ParNew: 5567K->576K(5568K), 0.0101110 secs] 2817105K->2812715K(3061272K), 0.0102200 secs] [Times: user=0.07 sys=0.00, real=0.01 secs]
@@ -139,20 +139,20 @@ export HBASE_OPTS="-XX:+UseConcMarkSweepGC -verbose:gc -XX:+PrintGCDetails -XX:+
The third line indicates a "minor GC", which pauses the VM for 0.0101110 seconds - aka 10 milliseconds. It has reduced the "ParNew" from about 5.5m to 576k.
Later on in this cycle we see:
-
-64901.445: [CMS-concurrent-mark: 1.542/2.492 secs] [Times: user=10.49 sys=0.33, real=2.49 secs]
+
+64901.445: [CMS-concurrent-mark: 1.542/2.492 secs] [Times: user=10.49 sys=0.33, real=2.49 secs]
64901.445: [CMS-concurrent-preclean-start]
-64901.453: [GC 64901.453: [ParNew: 5505K->573K(5568K), 0.0062440 secs] 2868746K->2864292K(3061272K), 0.0063360 secs] [Times: user=0.05 sys=0.00, real=0.01 secs]
-64901.476: [GC 64901.476: [ParNew: 5563K->575K(5568K), 0.0072510 secs] 2869283K->2864837K(3061272K), 0.0073320 secs] [Times: user=0.05 sys=0.01, real=0.01 secs]
-64901.500: [GC 64901.500: [ParNew: 5517K->573K(5568K), 0.0120390 secs] 2869780K->2865267K(3061272K), 0.0121150 secs] [Times: user=0.09 sys=0.00, real=0.01 secs]
-64901.529: [GC 64901.529: [ParNew: 5507K->569K(5568K), 0.0086240 secs] 2870200K->2865742K(3061272K), 0.0087180 secs] [Times: user=0.05 sys=0.00, real=0.01 secs]
-64901.554: [GC 64901.555: [ParNew: 5516K->575K(5568K), 0.0107130 secs] 2870689K->2866291K(3061272K), 0.0107820 secs] [Times: user=0.06 sys=0.00, real=0.01 secs]
-64901.578: [CMS-concurrent-preclean: 0.070/0.133 secs] [Times: user=0.48 sys=0.01, real=0.14 secs]
+64901.453: [GC 64901.453: [ParNew: 5505K->573K(5568K), 0.0062440 secs] 2868746K->2864292K(3061272K), 0.0063360 secs] [Times: user=0.05 sys=0.00, real=0.01 secs]
+64901.476: [GC 64901.476: [ParNew: 5563K->575K(5568K), 0.0072510 secs] 2869283K->2864837K(3061272K), 0.0073320 secs] [Times: user=0.05 sys=0.01, real=0.01 secs]
+64901.500: [GC 64901.500: [ParNew: 5517K->573K(5568K), 0.0120390 secs] 2869780K->2865267K(3061272K), 0.0121150 secs] [Times: user=0.09 sys=0.00, real=0.01 secs]
+64901.529: [GC 64901.529: [ParNew: 5507K->569K(5568K), 0.0086240 secs] 2870200K->2865742K(3061272K), 0.0087180 secs] [Times: user=0.05 sys=0.00, real=0.01 secs]
+64901.554: [GC 64901.555: [ParNew: 5516K->575K(5568K), 0.0107130 secs] 2870689K->2866291K(3061272K), 0.0107820 secs] [Times: user=0.06 sys=0.00, real=0.01 secs]
+64901.578: [CMS-concurrent-preclean: 0.070/0.133 secs] [Times: user=0.48 sys=0.01, real=0.14 secs]
64901.578: [CMS-concurrent-abortable-preclean-start]
-64901.584: [GC 64901.584: [ParNew: 5504K->571K(5568K), 0.0087270 secs] 2871220K->2866830K(3061272K), 0.0088220 secs] [Times: user=0.05 sys=0.00, real=0.01 secs]
-64901.609: [GC 64901.609: [ParNew: 5512K->569K(5568K), 0.0063370 secs] 2871771K->2867322K(3061272K), 0.0064230 secs] [Times: user=0.06 sys=0.00, real=0.01 secs]
-64901.615: [CMS-concurrent-abortable-preclean: 0.007/0.037 secs] [Times: user=0.13 sys=0.00, real=0.03 secs]
-64901.616: [GC[YG occupancy: 645 K (5568 K)]64901.616: [Rescan (parallel) , 0.0020210 secs]64901.618: [weak refs processing, 0.0027950 secs] [1 CMS-remark: 2866753K(3055704K)] 2867399K(3061272K), 0.0049380 secs] [Times: user=0.00 sys=0.01, real=0.01 secs]
+64901.584: [GC 64901.584: [ParNew: 5504K->571K(5568K), 0.0087270 secs] 2871220K->2866830K(3061272K), 0.0088220 secs] [Times: user=0.05 sys=0.00, real=0.01 secs]
+64901.609: [GC 64901.609: [ParNew: 5512K->569K(5568K), 0.0063370 secs] 2871771K->2867322K(3061272K), 0.0064230 secs] [Times: user=0.06 sys=0.00, real=0.01 secs]
+64901.615: [CMS-concurrent-abortable-preclean: 0.007/0.037 secs] [Times: user=0.13 sys=0.00, real=0.03 secs]
+64901.616: [GC[YG occupancy: 645 K (5568 K)]64901.616: [Rescan (parallel) , 0.0020210 secs]64901.618: [weak refs processing, 0.0027950 secs] [1 CMS-remark: 2866753K(3055704K)] 2867399K(3061272K), 0.0049380 secs] [Times: user=0.00 sys=0.01, real=0.01 secs]
64901.621: [CMS-concurrent-sweep-start]
@@ -161,20 +161,20 @@ export HBASE_OPTS="-XX:+UseConcMarkSweepGC -verbose:gc -XX:+PrintGCDetails -XX:+
There are a few more minor GCs, then there is a pause at the 2nd last line:
-
-64901.616: [GC[YG occupancy: 645 K (5568 K)]64901.616: [Rescan (parallel) , 0.0020210 secs]64901.618: [weak refs processing, 0.0027950 secs] [1 CMS-remark: 2866753K(3055704K)] 2867399K(3061272K), 0.0049380 secs] [Times: user=0.00 sys=0.01, real=0.01 secs]
+
+64901.616: [GC[YG occupancy: 645 K (5568 K)]64901.616: [Rescan (parallel) , 0.0020210 secs]64901.618: [weak refs processing, 0.0027950 secs] [1 CMS-remark: 2866753K(3055704K)] 2867399K(3061272K), 0.0049380 secs] [Times: user=0.00 sys=0.01, real=0.01 secs]
- The pause here is 0.0049380 seconds (aka 4.9 milliseconds) to 'remark' the heap.
+ The pause here is 0.0049380 seconds (aka 4.9 milliseconds) to 'remark' the heap.
At this point the sweep starts, and you can watch the heap size go down:
-64901.637: [GC 64901.637: [ParNew: 5501K->569K(5568K), 0.0097350 secs] 2871958K->2867441K(3061272K), 0.0098370 secs] [Times: user=0.05 sys=0.00, real=0.01 secs]
+64901.637: [GC 64901.637: [ParNew: 5501K->569K(5568K), 0.0097350 secs] 2871958K->2867441K(3061272K), 0.0098370 secs] [Times: user=0.05 sys=0.00, real=0.01 secs]
... lines removed ...
-64904.936: [GC 64904.936: [ParNew: 5532K->568K(5568K), 0.0070720 secs] 1365024K->1360689K(3061272K), 0.0071930 secs] [Times: user=0.05 sys=0.00, real=0.01 secs]
-64904.953: [CMS-concurrent-sweep: 2.030/3.332 secs] [Times: user=9.57 sys=0.26, real=3.33 secs]
+64904.936: [GC 64904.936: [ParNew: 5532K->568K(5568K), 0.0070720 secs] 1365024K->1360689K(3061272K), 0.0071930 secs] [Times: user=0.05 sys=0.00, real=0.01 secs]
+64904.953: [CMS-concurrent-sweep: 2.030/3.332 secs] [Times: user=9.57 sys=0.26, real=3.33 secs]
At this point, the CMS sweep took 3.332 seconds, and heap went from about ~ 2.8 GB to 1.3 GB (approximate).
@@ -186,14 +186,14 @@ export HBASE_OPTS="-XX:+UseConcMarkSweepGC -verbose:gc -XX:+PrintGCDetails -XX:+
Add this to HBASE_OPTS:
-
+
export HBASE_OPTS="-XX:NewSize=64m -XX:MaxNewSize=64m <cms options from above> <gc logging options from above>"
For more information on GC pauses, see the 3 part blog post by Todd Lipcon
and above.
-
+
@@ -201,18 +201,18 @@ export HBASE_OPTS="-XX:NewSize=64m -XX:MaxNewSize=64m <cms options from above
search-hadoop.com
- search-hadoop.com indexes all the mailing lists and is great for historical searches.
+ search-hadoop.com indexes all the mailing lists and is great for historical searches.
Search here first when you have an issue as its more than likely someone has already had your problem.
Mailing Lists
- Ask a question on the HBase mailing lists.
- The 'dev' mailing list is aimed at the community of developers actually building HBase and for features currently under development, and 'user'
- is generally used for questions on released versions of HBase. Before going to the mailing list, make sure your
+ Ask a question on the Apache HBase mailing lists.
+ The 'dev' mailing list is aimed at the community of developers actually building Apache HBase and for features currently under development, and 'user'
+ is generally used for questions on released versions of Apache HBase. Before going to the mailing list, make sure your
question has not already been answered by searching the mailing list archives first. Use
.
- Take some time crafting your questionSee Getting Answers; a quality question that includes all context and
+ Take some time crafting your questionSee Getting Answers; a quality question that includes all context and
exhibits evidence the author has tried to find answers in the manual and out on lists
is more likely to get a prompt response.
@@ -236,7 +236,7 @@ export HBASE_OPTS="-XX:NewSize=64m -XX:MaxNewSize=64m <cms options from above
Master Web InterfaceThe Master starts a web-interface on port 60010 by default.
- The Master web UI lists created tables and their definition (e.g., ColumnFamilies, blocksize, etc.). Additionally,
+ The Master web UI lists created tables and their definition (e.g., ColumnFamilies, blocksize, etc.). Additionally,
the available RegionServers in the cluster are listed along with selected high-level metrics (requests, number of regions, usedHeap, maxHeap).
The Master web UI allows navigation to each RegionServer's web UI.
@@ -263,13 +263,13 @@ export HBASE_OPTS="-XX:NewSize=64m -XX:MaxNewSize=64m <cms options from above
ls path [watch]
set path data [version]
delquota [-n|-b] path
- quit
+ quit
printwatches on|off
create [-s] [-e] path data acl
stat path [watch]
- close
+ close
ls2 path [watch]
- history
+ history
listquota path
setAcl path acl
getAcl path
@@ -292,7 +292,7 @@ export HBASE_OPTS="-XX:NewSize=64m -XX:MaxNewSize=64m <cms options from above
top
-
+ top is probably one of the most important tool when first trying to see what’s running on a machine and how the resources are consumed. Here’s an example from production system:
top - 14:46:59 up 39 days, 11:55, 1 user, load average: 3.75, 3.57, 3.84
@@ -300,10 +300,10 @@ Tasks: 309 total, 1 running, 308 sleeping, 0 stopped, 0 zombie
Cpu(s): 4.5%us, 1.6%sy, 0.0%ni, 91.7%id, 1.4%wa, 0.1%hi, 0.6%si, 0.0%st
Mem: 24414432k total, 24296956k used, 117476k free, 7196k buffers
Swap: 16008732k total, 14348k used, 15994384k free, 11106908k cached
-
- PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
-15558 hadoop 18 -2 3292m 2.4g 3556 S 79 10.4 6523:52 java
-13268 hadoop 18 -2 8967m 8.2g 4104 S 21 35.1 5170:30 java
+
+ PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
+15558 hadoop 18 -2 3292m 2.4g 3556 S 79 10.4 6523:52 java
+13268 hadoop 18 -2 8967m 8.2g 4104 S 21 35.1 5170:30 java
8895 hadoop 18 -2 1581m 497m 3420 S 11 2.1 4002:32 java
…
@@ -351,7 +351,7 @@ hadoop@sv4borg12:~$ jps
hadoop@sv4borg12:~$ ps aux | grep HRegionServer
hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/jdk1.6.0_14/bin/java -Xmx8000m -XX:+DoEscapeAnalysis -XX:+AggressiveOpts -XX:+UseConcMarkSweepGC -XX:NewSize=64m -XX:MaxNewSize=64m -XX:CMSInitiatingOccupancyFraction=88 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/export1/hadoop/logs/gc-hbase.log -Dcom.sun.management.jmxremote.port=10102 -Dcom.sun.management.jmxremote.authenticate=true -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.password.file=/home/hadoop/hbase/conf/jmxremote.password -Dcom.sun.management.jmxremote -Dhbase.log.dir=/export1/hadoop/logs -Dhbase.log.file=hbase-hadoop-regionserver-sv4borg12.log -Dhbase.home.dir=/home/hadoop/hbase -Dhbase.id.str=hadoop -Dhbase.root.logger=INFO,DRFA -Djava.library.path=/home/hadoop/hbase/lib/native/Linux-amd64-64 -classpath /home/hadoop/hbase/bin/../conf:[many jars]:/home/hadoop/hadoop/conf org.apache.hadoop.hbase.regionserver.HRegionServer start
-
+
@@ -371,7 +371,7 @@ hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/j
at java.util.concurrent.LinkedBlockingQueue.poll(LinkedBlockingQueue.java:395)
at org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:647)
at java.lang.Thread.run(Thread.java:619)
-
+
The MemStore flusher thread that is currently flushing to a file:
"regionserver60020.cacheFlusher" daemon prio=10 tid=0x0000000040f4e000 nid=0x45eb in Object.wait() [0x00007f16b5b86000..0x00007f16b5b87af0]
java.lang.Thread.State: WAITING (on object monitor)
@@ -444,7 +444,7 @@ hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/j
A thread that receives data from HDFS:
-
+
"IPC Client (47) connection to sv4borg9/10.4.24.40:9000 from hadoop" daemon prio=10 tid=0x00007f16a02d0000 nid=0x4fa3 runnable [0x00007f16b517d000..0x00007f16b517dbf0]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
@@ -498,63 +498,75 @@ hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/j
OpenTSDB
- OpenTSDB is an excellent alternative to Ganglia as it uses HBase to store all the time series and doesn’t have to downsample. Monitoring your own HBase cluster that hosts OpenTSDB is a good exercise.
+ OpenTSDB is an excellent alternative to Ganglia as it uses Apache HBase to store all the time series and doesn’t have to downsample. Monitoring your own HBase cluster that hosts OpenTSDB is a good exercise.
Here’s an example of a cluster that’s suffering from hundreds of compactions launched almost all around the same time, which severely affects the IO performance: (TODO: insert graph plotting compactionQueueSize)
- It’s a good practice to build dashboards with all the important graphs per machine and per cluster so that debugging issues can be done with a single quick look. For example, at StumbleUpon there’s one dashboard per cluster with the most important metrics from both the OS and HBase. You can then go down at the machine level and get even more detailed metrics.
+ It’s a good practice to build dashboards with all the important graphs per machine and per cluster so that debugging issues can be done with a single quick look. For example, at StumbleUpon there’s one dashboard per cluster with the most important metrics from both the OS and Apache HBase. You can then go down at the machine level and get even more detailed metrics.
clusterssh+top
-
- clusterssh+top, it’s like a poor man’s monitoring system and it can be quite useful when you have only a few machines as it’s very easy to setup. Starting clusterssh will give you one terminal per machine and another terminal in which whatever you type will be retyped in every window. This means that you can type “top” once and it will start it for all of your machines at the same time giving you full view of the current state of your cluster. You can also tail all the logs at the same time, edit files, etc.
+
+ clusterssh+top, it’s like a poor man’s monitoring system and it can be quite useful when you have only a few machines as it’s very easy to setup. Starting clusterssh will give you one terminal per machine and another terminal in which whatever you type will be retyped in every window. This means that you can type “top” once and it will start it for all of your machines at the same time giving you full view of the current state of your cluster. You can also tail all the logs at the same time, edit files, etc.
-
+
Client
- For more information on the HBase client, see .
+ For more information on the HBase client, see .
ScannerTimeoutException or UnknownScannerException
- This is thrown if the time between RPC calls from the client to RegionServer exceeds the scan timeout.
+ This is thrown if the time between RPC calls from the client to RegionServer exceeds the scan timeout.
For example, if Scan.setCaching is set to 500, then there will be an RPC call to fetch the next batch of rows every 500 .next() calls on the ResultScanner
because data is being transferred in blocks of 500 rows to the client. Reducing the setCaching value may be an option, but setting this value too low makes for inefficient
processing on numbers of rows.
See .
-
+
+
+ LeaseException when calling Scanner.next
+
+In some situations clients that fetch data from a RegionServer get a LeaseException instead of the usual
+. Usually the source of the exception is
+org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:230) (line number may vary).
+It tends to happen in the context of a slow/freezing RegionServer#next call.
+It can be prevented by having hbase.rpc.timeout > hbase.regionserver.lease.period.
+Harsh J investigated the issue as part of the mailing list thread
+HBase, mail # user - Lease does not exist exceptions
+
+ Shell or client application throws lots of scary exceptions during normal operationSince 0.20.0 the default log level for org.apache.hadoop.hbase.*is DEBUG.
- On your clients, edit $HBASE_HOME/conf/log4j.properties and change this: log4j.logger.org.apache.hadoop.hbase=DEBUG to this: log4j.logger.org.apache.hadoop.hbase=INFO, or even log4j.logger.org.apache.hadoop.hbase=WARN.
+ On your clients, edit $HBASE_HOME/conf/log4j.properties and change this: log4j.logger.org.apache.hadoop.hbase=DEBUG to this: log4j.logger.org.apache.hadoop.hbase=INFO, or even log4j.logger.org.apache.hadoop.hbase=WARN.
-
+
Long Client Pauses With Compression
- This is a fairly frequent question on the HBase dist-list. The scenario is that a client is typically inserting a lot of data into a
+ This is a fairly frequent question on the Apache HBase dist-list. The scenario is that a client is typically inserting a lot of data into a
relatively un-optimized HBase cluster. Compression can exacerbate the pauses, although it is not the source of the problem.See on the pattern for pre-creating regions and confirm that the table isn't starting with a single region.
- See for cluster configuration, particularly hbase.hstore.blockingStoreFiles, hbase.hregion.memstore.block.multiplier,
+ See for cluster configuration, particularly hbase.hstore.blockingStoreFiles, hbase.hregion.memstore.block.multiplier,
MAX_FILESIZE (region size), and MEMSTORE_FLUSHSIZE.
- A slightly longer explanation of why pauses can happen is as follows: Puts are sometimes blocked on the MemStores which are blocked by the flusher thread which is blocked because there are
+ A slightly longer explanation of why pauses can happen is as follows: Puts are sometimes blocked on the MemStores which are blocked by the flusher thread which is blocked because there are
too many files to compact because the compactor is given too many small files to compact and has to compact the same data repeatedly. This situation can occur even with minor compactions.
- Compounding this situation, HBase doesn't compress data in memory. Thus, the 64MB that lives in the MemStore could become a 6MB file after compression - which results in a smaller StoreFile. The upside is that
+ Compounding this situation, Apache HBase doesn't compress data in memory. Thus, the 64MB that lives in the MemStore could become a 6MB file after compression - which results in a smaller StoreFile. The upside is that
more data is packed into the same region, but performance is achieved by being able to write larger files - which is why HBase waits until the flushize before writing a new StoreFile. And smaller StoreFiles
- become targets for compaction. Without compression the files are much bigger and don't need as much compaction, however this is at the expense of I/O.
+ become targets for compaction. Without compression the files are much bigger and don't need as much compaction, however this is at the expense of I/O.
For additional information, see this thread on Long client pauses with compression.
-
-
+
+
ZooKeeper Client Connection ErrorsErrors like this...
@@ -576,11 +588,11 @@ hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/j
11/07/05 11:26:45 INFO zookeeper.ClientCnxn: Opening socket connection to
server localhost/127.0.0.1:2181
- ... are either due to ZooKeeper being down, or unreachable due to network issues.
+ ... are either due to ZooKeeper being down, or unreachable due to network issues.
The utility may help investigate ZooKeeper issues.
-
+
Client running out of memory though heap size seems to be stable (but the off-heap/direct heap keeps growing)
@@ -595,24 +607,46 @@ it a bit hefty. You want to make this setting client-side only especially if y
server-side off-heap cache since this feature depends on being able to use big direct buffers (You may have to keep
separate client-side and server-side config dirs).
-
+
Client Slowdown When Calling Admin Methods (flush, compact, etc.)
This is a client issue fixed by HBASE-5073 in 0.90.6.
-There was a ZooKeeper leak in the client and the client was getting pummeled by ZooKeeper events with each additional
-invocation of the admin API.
+There was a ZooKeeper leak in the client and the client was getting pummeled by ZooKeeper events with each additional
+invocation of the admin API.
-
+
+
+
+ Secure Client Cannot Connect ([Caused by GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)])
+
+There can be several causes that produce this symptom.
+
+
+First, check that you have a valid Kerberos ticket. One is required in order to set up communication with a secure Apache HBase cluster. Examine the ticket currently in the credential cache, if any, by running the klist command line utility. If no ticket is listed, you must obtain a ticket by running the kinit command with either a keytab specified, or by interactively entering a password for the desired principal.
+
+
+Then, consult the Java Security Guide troubleshooting section. The most common problem addressed there is resolved by setting javax.security.auth.useSubjectCredsOnly system property value to false.
+
+
+Because of a change in the format in which MIT Kerberos writes its credentials cache, there is a bug in the Oracle JDK 6 Update 26 and earlier that causes Java to be unable to read the Kerberos credentials cache created by versions of MIT Kerberos 1.8.1 or higher. If you have this problematic combination of components in your environment, to work around this problem, first log in with kinit and then immediately refresh the credential cache with kinit -R. The refresh will rewrite the credential cache without the problematic formatting.
+
+
+Finally, depending on your Kerberos configuration, you may need to install the Java Cryptography Extension, or JCE. Insure the JCE jars are on the classpath on both server and client systems.
+
+
+You may also need to download the unlimited strength JCE policy files. Uncompress and extract the downloaded file, and install the policy jars into <java-home>/lib/security.
+
+
-
+
MapReduceYou Think You're On The Cluster, But You're Actually LocalThis following stacktrace happened using ImportTsv, but things like this
- can happen on any job with a mis-configuration.
+ can happen on any job with a mis-configuration.
WARN mapred.LocalJobRunner: job_local_0001
java.lang.IllegalArgumentException: Can't read partitions file
@@ -637,17 +671,17 @@ Caused by: java.io.FileNotFoundException: File _partition.lst does not exist.
LocalJobRunner means the job is running locally, not on the cluster.
- See
+ See
- http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/package-summary.html#classpath for more
+ http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/package-summary.html#classpath for more
information on HBase MapReduce jobs and classpaths.
-
+
-
+
NameNode
- For more information on the NameNode, see .
+ For more information on the NameNode, see .
HDFS Utilization of Tables and Regions
@@ -657,7 +691,7 @@ Caused by: java.io.FileNotFoundException: File _partition.lst does not exist.
hadoop fs -du /hbase/myTable ...returns a list of the regions under the HBase table 'myTable' and their disk utilization. For more information on HDFS shell commands, see the HDFS FileSystem Shell documentation.
-
+ Browsing HDFS for HBase ObjectsSomtimes it will be necessary to explore the HBase objects that exist on HDFS. These objects could include the WALs (Write Ahead Logs), tables, regions, StoreFiles, etc.
@@ -675,30 +709,30 @@ Caused by: java.io.FileNotFoundException: File _partition.lst does not exist.
The HDFS directory structure of HBase WAL is..
/hbase
- /.logs
+ /.logs/<RegionServer> (RegionServers)
/<HLog> (WAL HLog files for the RegionServer)
- See the HDFS User Guide for other non-shell diagnostic
- utilities like fsck.
+ See the HDFS User Guide for other non-shell diagnostic
+ utilities like fsck.
Use Cases
- Two common use-cases for querying HDFS for HBase objects is research the degree of uncompaction of a table. If there are a large number of StoreFiles for each ColumnFamily it could
+ Two common use-cases for querying HDFS for HBase objects is research the degree of uncompaction of a table. If there are a large number of StoreFiles for each ColumnFamily it could
indicate the need for a major compaction. Additionally, after a major compaction if the resulting StoreFile is "small" it could indicate the need for a reduction of ColumnFamilies for
the table.
-
+
-
+
NetworkNetwork Spikes
- If you are seeing periodic network spikes you might want to check the compactionQueues to see if major
+ If you are seeing periodic network spikes you might want to check the compactionQueues to see if major
compactions are happening.
See for more information on managing compactions.
@@ -709,11 +743,17 @@ Caused by: java.io.FileNotFoundException: File _partition.lst does not exist.
HBase expects the loopback IP Address to be 127.0.0.1. See the Getting Started section on .
+
+ Network Interfaces
+ Are all the network interfaces functioning correctly? Are you sure? See the Troubleshooting Case Study in .
+
+
+
-
+
RegionServer
- For more information on the RegionServers, see .
+ For more information on the RegionServers, see .
Startup Errors
@@ -721,9 +761,9 @@ Caused by: java.io.FileNotFoundException: File _partition.lst does not exist.
Master Starts, But RegionServers Do NotThe Master believes the RegionServers have the IP of 127.0.0.1 - which is localhost and resolves to the master's own localhost.
- The RegionServers are erroneously informing the Master that their IP addresses are 127.0.0.1.
+ The RegionServers are erroneously informing the Master that their IP addresses are 127.0.0.1.
- Modify /etc/hosts on the region servers, from...
+ Modify /etc/hosts on the region servers, from...
# Do not remove the following line, or various programs
# that require network functionality will fail.
@@ -739,7 +779,7 @@ Caused by: java.io.FileNotFoundException: File _partition.lst does not exist.
-
+
Compression Link Errors
@@ -753,8 +793,8 @@ java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path
.. then there is a path issue with the compression libraries. See the Configuration section on LZO compression configuration.
-
-
+
+
Runtime Errors
@@ -767,7 +807,7 @@ java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path
Adding -XX:+UseMembar to the HBase HBASE_OPTS in conf/hbase-env.sh
may fix it.
- Also, are you using ? These are discouraged because they can lock up the
+ Also, are you using ? These are discouraged because they can lock up the
RegionServers if not managed properly.
@@ -776,7 +816,7 @@ java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path
If you see log messages like this...
-2010-09-13 01:24:17,336 WARN org.apache.hadoop.hdfs.server.datanode.DataNode:
+2010-09-13 01:24:17,336 WARN org.apache.hadoop.hdfs.server.datanode.DataNode:
Disk-related IOException in BlockReceiver constructor. Cause is java.io.IOException: Too many open files
at java.io.UnixFileSystem.createFileExclusively(Native Method)
at java.io.File.createNewFile(File.java:883)
@@ -807,7 +847,7 @@ Disk-related IOException in BlockReceiver constructor. Cause is java.io.IOExcept
2009-02-24 10:01:33,516 WARN org.apache.hadoop.hbase.util.Sleeper: We slept xxx ms, ten times longer than scheduled: 10000
2009-02-24 10:01:33,516 WARN org.apache.hadoop.hbase.util.Sleeper: We slept xxx ms, ten times longer than scheduled: 15000
-2009-02-24 10:01:36,472 WARN org.apache.hadoop.hbase.regionserver.HRegionServer: unable to report to master for xxx milliseconds - retrying
+2009-02-24 10:01:36,472 WARN org.apache.hadoop.hbase.regionserver.HRegionServer: unable to report to master for xxx milliseconds - retrying
... or see full GC compactions then you may be experiencing full GC's.
@@ -838,12 +878,12 @@ java.io.IOException: Session Expired
at org.apache.zookeeper.ClientCnxn$SendThread.readConnectResult(ClientCnxn.java:589)
at org.apache.zookeeper.ClientCnxn$SendThread.doIO(ClientCnxn.java:709)
at org.apache.zookeeper.ClientCnxn$SendThread.run(ClientCnxn.java:945)
-ERROR org.apache.hadoop.hbase.regionserver.HRegionServer: ZooKeeper session expired
+ERROR org.apache.hadoop.hbase.regionserver.HRegionServer: ZooKeeper session expired
The JVM is doing a long running garbage collecting which is pausing every threads (aka "stop the world").
Since the RegionServer's local ZooKeeper client cannot send heartbeats, the session times out.
- By design, we shut down any node that isn't able to contact the ZooKeeper ensemble after getting a timeout so that it stops serving data that may already be assigned elsewhere.
+ By design, we shut down any node that isn't able to contact the ZooKeeper ensemble after getting a timeout so that it stops serving data that may already be assigned elsewhere.
@@ -852,7 +892,7 @@ ERROR org.apache.hadoop.hbase.regionserver.HRegionServer: ZooKeeper session expi
Make sure you are not CPU starving the RegionServer thread. For example, if you are running a MapReduce job using 6 CPU-intensive tasks on a machine with 4 cores, you are probably starving the RegionServer enough to create longer garbage collection pauses.Increase the ZooKeeper session timeout
- If you wish to increase the session timeout, add the following to your hbase-site.xml to increase the timeout from the default of 60 seconds to 120 seconds.
+ If you wish to increase the session timeout, add the following to your hbase-site.xml to increase the timeout from the default of 60 seconds to 120 seconds.
<property>
<name>zookeeper.session.timeout</name>
@@ -866,8 +906,8 @@ ERROR org.apache.hadoop.hbase.regionserver.HRegionServer: ZooKeeper session expi
Be aware that setting a higher timeout means that the regions served by a failed RegionServer will take at least
- that amount of time to be transfered to another RegionServer. For a production system serving live requests, we would instead
- recommend setting it lower than 1 minute and over-provision your cluster in order the lower the memory load on each machines (hence having
+ that amount of time to be transfered to another RegionServer. For a production system serving live requests, we would instead
+ recommend setting it lower than 1 minute and over-provision your cluster in order the lower the memory load on each machines (hence having
less garbage to collect per machine).
@@ -884,7 +924,7 @@ ERROR org.apache.hadoop.hbase.regionserver.HRegionServer: ZooKeeper session expi
Regions listed by domain name, then IP
- Fix your DNS. In versions of HBase before 0.92.x, reverse DNS needs to give same answer
+ Fix your DNS. In versions of Apache HBase before 0.92.x, reverse DNS needs to give same answer
as forward lookup. See HBASE 3431
RegionServer is not using the name given it by the master; double entry in master listing of servers for gorey details.
@@ -908,35 +948,41 @@ ERROR org.apache.hadoop.hbase.regionserver.HRegionServer: ZooKeeper session expi
-
+
Shutdown Errors
-
+
-
+
Master
- For more information on the Master, see .
+ For more information on the Master, see .
Startup ErrorsMaster says that you need to run the hbase migrations scriptUpon running that, the hbase migrations script says no files in root directory.
- HBase expects the root directory to either not exist, or to have already been initialized by hbase running a previous time. If you create a new directory for HBase using Hadoop DFS, this error will occur.
- Make sure the HBase root directory does not currently exist or has been initialized by a previous run of HBase. Sure fire solution is to just use Hadoop dfs to delete the HBase root and let HBase create and initialize the directory itself.
-
+ HBase expects the root directory to either not exist, or to have already been initialized by hbase running a previous time. If you create a new directory for HBase using Hadoop DFS, this error will occur.
+ Make sure the HBase root directory does not currently exist or has been initialized by a previous run of HBase. Sure fire solution is to just use Hadoop dfs to delete the HBase root and let HBase create and initialize the directory itself.
+
+
+
+ Packet len6080218 is out of range!
+ If you have many regions on your cluster and you see an error
+ like that reported above in this sections title in your logs, see
+ HBASE-4246 Cluster with too many regions cannot withstand some master failover scenarios.
-
-
+
+ Shutdown Errors
-
+
-
+
ZooKeeper
@@ -945,28 +991,28 @@ ERROR org.apache.hadoop.hbase.regionserver.HRegionServer: ZooKeeper session expi
Could not find my address: xyz in list of ZooKeeper quorum serversA ZooKeeper server wasn't able to start, throws that error. xyz is the name of your server.
- This is a name lookup problem. HBase tries to start a ZooKeeper server on some machine but that machine isn't able to find itself in the hbase.zookeeper.quorum configuration.
-
- Use the hostname presented in the error message instead of the value you used. If you have a DNS server, you can set hbase.zookeeper.dns.interface and hbase.zookeeper.dns.nameserver in hbase-site.xml to make sure it resolves to the correct FQDN.
-
+ This is a name lookup problem. HBase tries to start a ZooKeeper server on some machine but that machine isn't able to find itself in the hbase.zookeeper.quorum configuration.
+
+ Use the hostname presented in the error message instead of the value you used. If you have a DNS server, you can set hbase.zookeeper.dns.interface and hbase.zookeeper.dns.nameserver in hbase-site.xml to make sure it resolves to the correct FQDN.
+
-
-
+
+
ZooKeeper, The Cluster CanaryZooKeeper is the cluster's "canary in the mineshaft". It'll be the first to notice issues if any so making sure its happy is the short-cut to a humming cluster.
-
+
See the ZooKeeper Operating Environment Troubleshooting page. It has suggestions and tools for checking disk and networking performance; i.e. the operating environment your ZooKeeper and HBase are running in.
Additionally, the utility may help investigate ZooKeeper issues.
-
+
-
+
- Amazon EC2
+ Amazon EC2ZooKeeper does not seem to work on Amazon EC2HBase does not start when deployed as Amazon EC2 instances. Exceptions like the below appear in the Master and/or RegionServer logs:
@@ -978,8 +1024,8 @@ ERROR org.apache.hadoop.hbase.regionserver.HRegionServer: ZooKeeper session expi
java.net.ConnectException: Connection refused
- Security group policy is blocking the ZooKeeper port on a public address.
- Use the internal EC2 host names when configuring the ZooKeeper quorum peer list.
+ Security group policy is blocking the ZooKeeper port on a public address.
+ Use the internal EC2 host names when configuring the ZooKeeper quorum peer list.
@@ -993,15 +1039,15 @@ ERROR org.apache.hadoop.hbase.regionserver.HRegionServer: ZooKeeper session expi
See Andrew's answer here, up on the user list: Remote Java client connection into EC2 instance.
-
+
-
+
- HBase and Hadoop version issues
+ HBase and Hadoop version issuesNoClassDefFoundError when trying to run 0.90.x on hadoop-0.20.205.x (or hadoop-1.0.x)
- HBase 0.90.x does not ship with hadoop-0.20.205.x, etc. To make it run, you need to replace the hadoop
- jars that HBase shipped with in its lib directory with those of the Hadoop you want to
+ Apache HBase 0.90.x does not ship with hadoop-0.20.205.x, etc. To make it run, you need to replace the hadoop
+ jars that Apache HBase shipped with in its lib directory with those of the Hadoop you want to
run HBase on. If even after replacing Hadoop jars you get the below exception:
sv4r6s38: Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/commons/configuration/Configuration
@@ -1018,7 +1064,27 @@ sv4r6s38: at org.apache.hadoop.security.UserGroupInformation.ensureInitial
you need to copy under hbase/lib, the commons-configuration-X.jar you find
in your Hadoop's lib directory. That should fix the above complaint.
+
+
+
+ ...cannot communicate with client version...
+If you see something like the following in your logs
+...
+2012-09-24 10:20:52,168 FATAL org.apache.hadoop.hbase.master.HMaster: Unhandled exception. Starting shutdown.
+org.apache.hadoop.ipc.RemoteException: Server IPC version 7 cannot communicate with client version 4
+...
+...are you trying to talk to an Hadoop 2.0.x from an HBase that has an Hadoop 1.0.x client?
+Use the HBase built against Hadoop 2.0 or rebuild your HBase passing the -Dhadoop.profile=2.0
+attribute to Maven (See for more).
+
+
-
+
+
+ Case Studies
+ For Performance and Troubleshooting Case Studies, see .
+
+
+
diff --git a/src/docbkx/upgrading.xml b/src/docbkx/upgrading.xml
index 5a1887284cf4..d1dcdd8c0e36 100644
--- a/src/docbkx/upgrading.xml
+++ b/src/docbkx/upgrading.xml
@@ -27,49 +27,29 @@
*/
-->
Upgrading
+ You cannot skip major verisons upgrading. If you are upgrading from
+ version 0.20.x to 0.92.x, you must first go from 0.20.x to 0.90.x and then go
+ from 0.90.x to 0.92.x.
Review , in particular the section on Hadoop version.
-
- Upgrading to HBase 0.90.x from 0.20.x or 0.89.x
- This version of 0.90.x HBase can be started on data written by
- HBase 0.20.x or HBase 0.89.x. There is no need of a migration step.
- HBase 0.89.x and 0.90.x does write out the name of region directories
- differently -- it names them with a md5 hash of the region name rather
- than a jenkins hash -- so this means that once started, there is no
- going back to HBase 0.20.x.
-
-
- Be sure to remove the hbase-default.xml from
- your conf
- directory on upgrade. A 0.20.x version of this file will have
- sub-optimal configurations for 0.90.x HBase. The
- hbase-default.xml file is now bundled into the
- HBase jar and read from there. If you would like to review
- the content of this file, see it in the src tree at
- src/main/resources/hbase-default.xml or
- see .
-
-
- Finally, if upgrading from 0.20.x, check your
- .META. schema in the shell. In the past we would
- recommend that users run with a 16kb
- MEMSTORE_FLUSHSIZE.
- Run hbase> scan '-ROOT-' in the shell. This will output
- the current .META. schema. Check
- MEMSTORE_FLUSHSIZE size. Is it 16kb (16384)? If so, you will
- need to change this (The 'normal'/default value is 64MB (67108864)).
- Run the script bin/set_meta_memstore_size.rb.
- This will make the necessary edit to your .META. schema.
- Failure to run this change will make for a slow cluster
-
- See HBASE-3499 Users upgrading to 0.90.0 need to have their .META. table updated with the right MEMSTORE_SIZE
-
-
- .
-
-
-
+
+ Upgrading from 0.94.x to 0.96.x
+ The Singularity
+ You will have to stop your old 0.94 cluster completely to upgrade. If you are replicating
+ between clusters, both clusters will have to go down to upgrade. Make sure it is a clean shutdown
+ so there are no WAL files laying around (TODO: Can 0.96 read 0.94 WAL files?). Make sure
+ zookeeper is cleared of state. All clients must be upgraded to 0.96 too.
+
+ The API has changed in a few areas; in particular how you use coprocessors (TODO: MapReduce too?)
+
+ TODO: Write about 3.4 zk ensemble and multi support
+
+
+ Upgrading from 0.92.x to 0.94.x
+ 0.92 and 0.94 are interface compatible. You can do a rolling upgrade between these versions.
+
+ Upgrading from 0.90.x to 0.92.xUpgrade Guide
@@ -170,7 +150,7 @@ The block size default size has been changed in 0.92.0 from 0.2 (20 percent of h
Experimental off-heap cache
-A new cache was contributed to 0.92.0 to act as a solution between using the “on-heap” cache which is the current LRU cache the region servers have and the operating system cache which is out of our control.
+A new cache was contributed to 0.92.0 to act as a solution between using the “on-heap” cache which is the current LRU cache the region servers have and the operating system cache which is out of our control.
To enable, set “-XX:MaxDirectMemorySize” in hbase-env.sh to the value for maximum direct memory size and specify hbase.offheapcache.percentage in hbase-site.xml with the percentage that you want to dedicate to off-heap cache. This should only be set for servers and not for clients. Use at your own risk.
See this blog post for additional information on this new experimental feature: http://www.cloudera.com/blog/2012/01/caching-in-hbase-slabcache/
@@ -194,8 +174,48 @@ See this blog post for additional information on this new experimental feature:
0.92.0 stores data in a new format, . As HBase runs, it will move all your data from HFile v1 to HFile v2 format. This auto-migration will run in the background as flushes and compactions run.
HFile V2 allows HBase run with larger regions/files. In fact, we encourage that all HBasers going forward tend toward Facebook axiom #1, run with larger, fewer regions.
-If you have lots of regions now -- more than 100s per host -- you should look into setting your region size up after you move to 0.92.0 (In 0.92.0, default size is not 1G, up from 256M), and then running online merge tool (See “HBASE-1621 merge tool should work on online cluster, but disabled table”).
+If you have lots of regions now -- more than 100s per host -- you should look into setting your region size up after you move to 0.92.0 (In 0.92.0, default size is now 1G, up from 256M), and then running online merge tool (See “HBASE-1621 merge tool should work on online cluster, but disabled table”).
+
+ Upgrading to HBase 0.90.x from 0.20.x or 0.89.x
+ This version of 0.90.x HBase can be started on data written by
+ HBase 0.20.x or HBase 0.89.x. There is no need of a migration step.
+ HBase 0.89.x and 0.90.x does write out the name of region directories
+ differently -- it names them with a md5 hash of the region name rather
+ than a jenkins hash -- so this means that once started, there is no
+ going back to HBase 0.20.x.
+
+
+ Be sure to remove the hbase-default.xml from
+ your conf
+ directory on upgrade. A 0.20.x version of this file will have
+ sub-optimal configurations for 0.90.x HBase. The
+ hbase-default.xml file is now bundled into the
+ HBase jar and read from there. If you would like to review
+ the content of this file, see it in the src tree at
+ src/main/resources/hbase-default.xml or
+ see .
+
+
+ Finally, if upgrading from 0.20.x, check your
+ .META. schema in the shell. In the past we would
+ recommend that users run with a 16kb
+ MEMSTORE_FLUSHSIZE.
+ Run hbase> scan '-ROOT-' in the shell. This will output
+ the current .META. schema. Check
+ MEMSTORE_FLUSHSIZE size. Is it 16kb (16384)? If so, you will
+ need to change this (The 'normal'/default value is 64MB (67108864)).
+ Run the script bin/set_meta_memstore_size.rb.
+ This will make the necessary edit to your .META. schema.
+ Failure to run this change will make for a slow cluster
+
+ See HBASE-3499 Users upgrading to 0.90.0 need to have their .META. table updated with the right MEMSTORE_SIZE
+
+
+ .
+
+
+
diff --git a/src/docbkx/zookeeper.xml b/src/docbkx/zookeeper.xml
new file mode 100644
index 000000000000..d6301e26fa1c
--- /dev/null
+++ b/src/docbkx/zookeeper.xml
@@ -0,0 +1,595 @@
+
+
+
+
+ ZooKeeper
+ ZooKeeper
+
+
+ A distributed Apache HBase (TM) installation depends on a running ZooKeeper cluster.
+ All participating nodes and clients need to be able to access the
+ running ZooKeeper ensemble. Apache HBase by default manages a ZooKeeper
+ "cluster" for you. It will start and stop the ZooKeeper ensemble
+ as part of the HBase start/stop process. You can also manage the
+ ZooKeeper ensemble independent of HBase and just point HBase at
+ the cluster it should use. To toggle HBase management of
+ ZooKeeper, use the HBASE_MANAGES_ZK variable in
+ conf/hbase-env.sh. This variable, which
+ defaults to true, tells HBase whether to
+ start/stop the ZooKeeper ensemble servers as part of HBase
+ start/stop.
+
+ When HBase manages the ZooKeeper ensemble, you can specify
+ ZooKeeper configuration using its native
+ zoo.cfg file, or, the easier option is to
+ just specify ZooKeeper options directly in
+ conf/hbase-site.xml. A ZooKeeper
+ configuration option can be set as a property in the HBase
+ hbase-site.xml XML configuration file by
+ prefacing the ZooKeeper option name with
+ hbase.zookeeper.property. For example, the
+ clientPort setting in ZooKeeper can be changed
+ by setting the
+ hbase.zookeeper.property.clientPort property.
+ For all default values used by HBase, including ZooKeeper
+ configuration, see . Look for the
+ hbase.zookeeper.property prefix
+ For the full list of ZooKeeper configurations, see
+ ZooKeeper's zoo.cfg. HBase does not ship
+ with a zoo.cfg so you will need to browse
+ the conf directory in an appropriate
+ ZooKeeper download.
+
+
+ You must at least list the ensemble servers in
+ hbase-site.xml using the
+ hbase.zookeeper.quorum property. This property
+ defaults to a single ensemble member at
+ localhost which is not suitable for a fully
+ distributed HBase. (It binds to the local machine only and remote
+ clients will not be able to connect).
+ How many ZooKeepers should I run?
+
+ You can run a ZooKeeper ensemble that comprises 1 node
+ only but in production it is recommended that you run a
+ ZooKeeper ensemble of 3, 5 or 7 machines; the more members an
+ ensemble has, the more tolerant the ensemble is of host
+ failures. Also, run an odd number of machines. In ZooKeeper,
+ an even number of peers is supported, but it is normally not used
+ because an even sized ensemble requires, proportionally, more peers
+ to form a quorum than an odd sized ensemble requires. For example, an
+ ensemble with 4 peers requires 3 to form a quorum, while an ensemble with
+ 5 also requires 3 to form a quorum. Thus, an ensemble of 5 allows 2 peers to
+ fail, and thus is more fault tolerant than the ensemble of 4, which allows
+ only 1 down peer.
+
+ Give each ZooKeeper server around 1GB of RAM, and if possible, its own
+ dedicated disk (A dedicated disk is the best thing you can do
+ to ensure a performant ZooKeeper ensemble). For very heavily
+ loaded clusters, run ZooKeeper servers on separate machines
+ from RegionServers (DataNodes and TaskTrackers).
+
+
+ For example, to have HBase manage a ZooKeeper quorum on
+ nodes rs{1,2,3,4,5}.example.com, bound to
+ port 2222 (the default is 2181) ensure
+ HBASE_MANAGE_ZK is commented out or set to
+ true in conf/hbase-env.sh
+ and then edit conf/hbase-site.xml and set
+ hbase.zookeeper.property.clientPort and
+ hbase.zookeeper.quorum. You should also set
+ hbase.zookeeper.property.dataDir to other than
+ the default as the default has ZooKeeper persist data under
+ /tmp which is often cleared on system
+ restart. In the example below we have ZooKeeper persist to
+ /user/local/zookeeper.
+ <configuration>
+ ...
+ <property>
+ <name>hbase.zookeeper.property.clientPort</name>
+ <value>2222</value>
+ <description>Property from ZooKeeper's config zoo.cfg.
+ The port at which the clients will connect.
+ </description>
+ </property>
+ <property>
+ <name>hbase.zookeeper.quorum</name>
+ <value>rs1.example.com,rs2.example.com,rs3.example.com,rs4.example.com,rs5.example.com</value>
+ <description>Comma separated list of servers in the ZooKeeper Quorum.
+ For example, "host1.mydomain.com,host2.mydomain.com,host3.mydomain.com".
+ By default this is set to localhost for local and pseudo-distributed modes
+ of operation. For a fully-distributed setup, this should be set to a full
+ list of ZooKeeper quorum servers. If HBASE_MANAGES_ZK is set in hbase-env.sh
+ this is the list of servers which we will start/stop ZooKeeper on.
+ </description>
+ </property>
+ <property>
+ <name>hbase.zookeeper.property.dataDir</name>
+ <value>/usr/local/zookeeper</value>
+ <description>Property from ZooKeeper's config zoo.cfg.
+ The directory where the snapshot is stored.
+ </description>
+ </property>
+ ...
+ </configuration>
+
+ ZooKeeper Maintenance
+ Be sure to set up the data dir cleaner described under
+ Zookeeper Maintenance else you could
+ have 'interesting' problems a couple of months in; i.e. zookeeper could start
+ dropping sessions if it has to run through a directory of hundreds of thousands of
+ logs which is wont to do around leader reelection time -- a process rare but run on
+ occasion whether because a machine is dropped or happens to hiccup.
+
+
+
+ Using existing ZooKeeper ensemble
+
+ To point HBase at an existing ZooKeeper cluster, one that
+ is not managed by HBase, set HBASE_MANAGES_ZK
+ in conf/hbase-env.sh to false
+
+ ...
+ # Tell HBase whether it should manage its own instance of Zookeeper or not.
+ export HBASE_MANAGES_ZK=false Next set ensemble locations
+ and client port, if non-standard, in
+ hbase-site.xml, or add a suitably
+ configured zoo.cfg to HBase's
+ CLASSPATH. HBase will prefer the
+ configuration found in zoo.cfg over any
+ settings in hbase-site.xml.
+
+ When HBase manages ZooKeeper, it will start/stop the
+ ZooKeeper servers as a part of the regular start/stop scripts.
+ If you would like to run ZooKeeper yourself, independent of
+ HBase start/stop, you would do the following
+
+
+${HBASE_HOME}/bin/hbase-daemons.sh {start,stop} zookeeper
+
+
+ Note that you can use HBase in this manner to spin up a
+ ZooKeeper cluster, unrelated to HBase. Just make sure to set
+ HBASE_MANAGES_ZK to false
+ if you want it to stay up across HBase restarts so that when
+ HBase shuts down, it doesn't take ZooKeeper down with it.
+
+ For more information about running a distinct ZooKeeper
+ cluster, see the ZooKeeper Getting
+ Started Guide. Additionally, see the ZooKeeper Wiki or the
+ ZooKeeper documentation
+ for more information on ZooKeeper sizing.
+
+
+
+
+
+ SASL Authentication with ZooKeeper
+ Newer releases of Apache HBase (>= 0.92) will
+ support connecting to a ZooKeeper Quorum that supports
+ SASL authentication (which is available in Zookeeper
+ versions 3.4.0 or later).
+
+ This describes how to set up HBase to mutually
+ authenticate with a ZooKeeper Quorum. ZooKeeper/HBase
+ mutual authentication (HBASE-2418)
+ is required as part of a complete secure HBase configuration
+ (HBASE-3025).
+
+ For simplicity of explication, this section ignores
+ additional configuration required (Secure HDFS and Coprocessor
+ configuration). It's recommended to begin with an
+ HBase-managed Zookeeper configuration (as opposed to a
+ standalone Zookeeper quorum) for ease of learning.
+
+
+ Operating System Prerequisites
+
+
+ You need to have a working Kerberos KDC setup. For
+ each $HOST that will run a ZooKeeper
+ server, you should have a principle
+ zookeeper/$HOST. For each such host,
+ add a service key (using the kadmin or
+ kadmin.local tool's ktadd
+ command) for zookeeper/$HOST and copy
+ this file to $HOST, and make it
+ readable only to the user that will run zookeeper on
+ $HOST. Note the location of this file,
+ which we will use below as
+ $PATH_TO_ZOOKEEPER_KEYTAB.
+
+
+
+ Similarly, for each $HOST that will run
+ an HBase server (master or regionserver), you should
+ have a principle: hbase/$HOST. For each
+ host, add a keytab file called
+ hbase.keytab containing a service
+ key for hbase/$HOST, copy this file to
+ $HOST, and make it readable only to the
+ user that will run an HBase service on
+ $HOST. Note the location of this file,
+ which we will use below as
+ $PATH_TO_HBASE_KEYTAB.
+
+
+
+ Each user who will be an HBase client should also be
+ given a Kerberos principal. This principal should
+ usually have a password assigned to it (as opposed to,
+ as with the HBase servers, a keytab file) which only
+ this user knows. The client's principal's
+ maxrenewlife should be set so that it can
+ be renewed enough so that the user can complete their
+ HBase client processes. For example, if a user runs a
+ long-running HBase client process that takes at most 3
+ days, we might create this user's principal within
+ kadmin with: addprinc -maxrenewlife
+ 3days. The Zookeeper client and server
+ libraries manage their own ticket refreshment by
+ running threads that wake up periodically to do the
+ refreshment.
+
+
+ On each host that will run an HBase client
+ (e.g. hbase shell), add the following
+ file to the HBase home directory's conf
+ directory:
+
+
+ Client {
+ com.sun.security.auth.module.Krb5LoginModule required
+ useKeyTab=false
+ useTicketCache=true;
+ };
+
+
+ We'll refer to this JAAS configuration file as
+ $CLIENT_CONF below.
+
+
+ HBase-managed Zookeeper Configuration
+
+ On each node that will run a zookeeper, a
+ master, or a regionserver, create a JAAS
+ configuration file in the conf directory of the node's
+ HBASE_HOME directory that looks like the
+ following:
+
+
+ Server {
+ com.sun.security.auth.module.Krb5LoginModule required
+ useKeyTab=true
+ keyTab="$PATH_TO_ZOOKEEPER_KEYTAB"
+ storeKey=true
+ useTicketCache=false
+ principal="zookeeper/$HOST";
+ };
+ Client {
+ com.sun.security.auth.module.Krb5LoginModule required
+ useKeyTab=true
+ useTicketCache=false
+ keyTab="$PATH_TO_HBASE_KEYTAB"
+ principal="hbase/$HOST";
+ };
+
+
+ where the $PATH_TO_HBASE_KEYTAB and
+ $PATH_TO_ZOOKEEPER_KEYTAB files are what
+ you created above, and $HOST is the hostname for that
+ node.
+
+ The Server section will be used by
+ the Zookeeper quorum server, while the
+ Client section will be used by the HBase
+ master and regionservers. The path to this file should
+ be substituted for the text $HBASE_SERVER_CONF
+ in the hbase-env.sh
+ listing below.
+
+
+ The path to this file should be substituted for the
+ text $CLIENT_CONF in the
+ hbase-env.sh listing below.
+
+
+ Modify your hbase-env.sh to include the
+ following:
+
+
+ export HBASE_OPTS="-Djava.security.auth.login.config=$CLIENT_CONF"
+ export HBASE_MANAGES_ZK=true
+ export HBASE_ZOOKEEPER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_CONF"
+ export HBASE_MASTER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_CONF"
+ export HBASE_REGIONSERVER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_CONF"
+
+
+ where $HBASE_SERVER_CONF and
+ $CLIENT_CONF are the full paths to the
+ JAAS configuration files created above.
+
+ Modify your hbase-site.xml on each node
+ that will run zookeeper, master or regionserver to contain:
+
+
+
+ hbase.zookeeper.quorum
+ $ZK_NODES
+
+
+ hbase.cluster.distributed
+ true
+
+
+ hbase.zookeeper.property.authProvider.1
+ org.apache.zookeeper.server.auth.SASLAuthenticationProvider
+
+
+ hbase.zookeeper.property.kerberos.removeHostFromPrincipal
+ true
+
+
+ hbase.zookeeper.property.kerberos.removeRealmFromPrincipal
+ true
+
+
+ ]]>
+
+ where $ZK_NODES is the
+ comma-separated list of hostnames of the Zookeeper
+ Quorum hosts.
+
+ Start your hbase cluster by running one or more
+ of the following set of commands on the appropriate
+ hosts:
+
+
+
+ bin/hbase zookeeper start
+ bin/hbase master start
+ bin/hbase regionserver start
+
+
+
+
+ External Zookeeper Configuration
+ Add a JAAS configuration file that looks like:
+
+
+ Client {
+ com.sun.security.auth.module.Krb5LoginModule required
+ useKeyTab=true
+ useTicketCache=false
+ keyTab="$PATH_TO_HBASE_KEYTAB"
+ principal="hbase/$HOST";
+ };
+
+
+ where the $PATH_TO_HBASE_KEYTAB is the keytab
+ created above for HBase services to run on this host, and $HOST is the
+ hostname for that node. Put this in the HBase home's
+ configuration directory. We'll refer to this file's
+ full pathname as $HBASE_SERVER_CONF below.
+
+ Modify your hbase-env.sh to include the following:
+
+
+ export HBASE_OPTS="-Djava.security.auth.login.config=$CLIENT_CONF"
+ export HBASE_MANAGES_ZK=false
+ export HBASE_MASTER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_CONF"
+ export HBASE_REGIONSERVER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_CONF"
+
+
+
+ Modify your hbase-site.xml on each node
+ that will run a master or regionserver to contain:
+
+
+
+ hbase.zookeeper.quorum
+ $ZK_NODES
+
+
+ hbase.cluster.distributed
+ true
+
+
+ ]]>
+
+
+ where $ZK_NODES is the
+ comma-separated list of hostnames of the Zookeeper
+ Quorum hosts.
+
+
+ Add a zoo.cfg for each Zookeeper Quorum host containing:
+
+ authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider
+ kerberos.removeHostFromPrincipal=true
+ kerberos.removeRealmFromPrincipal=true
+
+
+ Also on each of these hosts, create a JAAS configuration file containing:
+
+
+ Server {
+ com.sun.security.auth.module.Krb5LoginModule required
+ useKeyTab=true
+ keyTab="$PATH_TO_ZOOKEEPER_KEYTAB"
+ storeKey=true
+ useTicketCache=false
+ principal="zookeeper/$HOST";
+ };
+
+
+ where $HOST is the hostname of each
+ Quorum host. We will refer to the full pathname of
+ this file as $ZK_SERVER_CONF below.
+
+
+
+
+ Start your Zookeepers on each Zookeeper Quorum host with:
+
+
+ SERVER_JVMFLAGS="-Djava.security.auth.login.config=$ZK_SERVER_CONF" bin/zkServer start
+
+
+
+
+
+ Start your HBase cluster by running one or more of the following set of commands on the appropriate nodes:
+
+
+
+ bin/hbase master start
+ bin/hbase regionserver start
+
+
+
+
+
+
+ Zookeeper Server Authentication Log Output
+ If the configuration above is successful,
+ you should see something similar to the following in
+ your Zookeeper server logs:
+
+11/12/05 22:43:39 INFO zookeeper.Login: successfully logged in.
+11/12/05 22:43:39 INFO server.NIOServerCnxnFactory: binding to port 0.0.0.0/0.0.0.0:2181
+11/12/05 22:43:39 INFO zookeeper.Login: TGT refresh thread started.
+11/12/05 22:43:39 INFO zookeeper.Login: TGT valid starting at: Mon Dec 05 22:43:39 UTC 2011
+11/12/05 22:43:39 INFO zookeeper.Login: TGT expires: Tue Dec 06 22:43:39 UTC 2011
+11/12/05 22:43:39 INFO zookeeper.Login: TGT refresh sleeping until: Tue Dec 06 18:36:42 UTC 2011
+..
+11/12/05 22:43:59 INFO auth.SaslServerCallbackHandler:
+ Successfully authenticated client: authenticationID=hbase/ip-10-166-175-249.us-west-1.compute.internal@HADOOP.LOCALDOMAIN;
+ authorizationID=hbase/ip-10-166-175-249.us-west-1.compute.internal@HADOOP.LOCALDOMAIN.
+11/12/05 22:43:59 INFO auth.SaslServerCallbackHandler: Setting authorizedID: hbase
+11/12/05 22:43:59 INFO server.ZooKeeperServer: adding SASL authorization for authorizationID: hbase
+
+
+
+
+
+
+
+ Zookeeper Client Authentication Log Output
+ On the Zookeeper client side (HBase master or regionserver),
+ you should see something similar to the following:
+
+
+11/12/05 22:43:59 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=ip-10-166-175-249.us-west-1.compute.internal:2181 sessionTimeout=180000 watcher=master:60000
+11/12/05 22:43:59 INFO zookeeper.ClientCnxn: Opening socket connection to server /10.166.175.249:2181
+11/12/05 22:43:59 INFO zookeeper.RecoverableZooKeeper: The identifier of this process is 14851@ip-10-166-175-249
+11/12/05 22:43:59 INFO zookeeper.Login: successfully logged in.
+11/12/05 22:43:59 INFO client.ZooKeeperSaslClient: Client will use GSSAPI as SASL mechanism.
+11/12/05 22:43:59 INFO zookeeper.Login: TGT refresh thread started.
+11/12/05 22:43:59 INFO zookeeper.ClientCnxn: Socket connection established to ip-10-166-175-249.us-west-1.compute.internal/10.166.175.249:2181, initiating session
+11/12/05 22:43:59 INFO zookeeper.Login: TGT valid starting at: Mon Dec 05 22:43:59 UTC 2011
+11/12/05 22:43:59 INFO zookeeper.Login: TGT expires: Tue Dec 06 22:43:59 UTC 2011
+11/12/05 22:43:59 INFO zookeeper.Login: TGT refresh sleeping until: Tue Dec 06 18:30:37 UTC 2011
+11/12/05 22:43:59 INFO zookeeper.ClientCnxn: Session establishment complete on server ip-10-166-175-249.us-west-1.compute.internal/10.166.175.249:2181, sessionid = 0x134106594320000, negotiated timeout = 180000
+
+
+
+
+
+ Configuration from Scratch
+
+ This has been tested on the current standard Amazon
+ Linux AMI. First setup KDC and principals as
+ described above. Next checkout code and run a sanity
+ check.
+
+
+ git clone git://git.apache.org/hbase.git
+ cd hbase
+ mvn clean test -Dtest=TestZooKeeperACL
+
+
+ Then configure HBase as described above.
+ Manually edit target/cached_classpath.txt (see below)..
+
+
+ bin/hbase zookeeper &
+ bin/hbase master &
+ bin/hbase regionserver &
+
+
+
+
+
+ Future improvements
+
+ Fix target/cached_classpath.txt
+
+ You must override the standard hadoop-core jar file from the
+ target/cached_classpath.txt
+ file with the version containing the HADOOP-7070 fix. You can use the following script to do this:
+
+
+ echo `find ~/.m2 -name "*hadoop-core*7070*SNAPSHOT.jar"` ':' `cat target/cached_classpath.txt` | sed 's/ //g' > target/tmp.txt
+ mv target/tmp.txt target/cached_classpath.txt
+
+
+
+
+
+
+
+ Set JAAS configuration
+ programmatically
+
+
+ This would avoid the need for a separate Hadoop jar
+ that fixes HADOOP-7070.
+
+
+
+ Elimination of
+ kerberos.removeHostFromPrincipal and
+ kerberos.removeRealmFromPrincipal
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/examples/healthcheck/healthcheck.sh b/src/examples/healthcheck/healthcheck.sh
new file mode 100644
index 000000000000..584636054ddc
--- /dev/null
+++ b/src/examples/healthcheck/healthcheck.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+ # Licensed to the Apache Software Foundation (ASF) under one
+ # or more contributor license agreements. See the NOTICE file
+ # distributed with this work for additional information
+ # regarding copyright ownership. The ASF licenses this file
+ # to you under the Apache License, Version 2.0 (the
+ # "License"); you may not use this file except in compliance
+ # with the License. You may obtain a copy of the License at
+ #
+ # http://www.apache.org/licenses/LICENSE-2.0
+ #
+ # Unless required by applicable law or agreed to in writing, software
+ # distributed under the License is distributed on an "AS IS" BASIS,
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ # See the License for the specific language governing permissions and
+ # limitations under the License.
+
+ # This is an example script for checking health of a node ( master or region server).
+ # The health chore script should essentially output an message containing "ERROR" at an undesirable
+ # outcome of the checks in the script.
+
+err=0;
+
+function check_disks {
+
+for m in `awk '$3~/ext3/ {printf" %s ",$2}' /etc/fstab` ; do
+ fsdev=""
+ fsdev=`awk -v m=$m '$2==m {print $1}' /proc/mounts`;
+ if [ -z "$fsdev" ] ; then
+ msg_="$msg_ $m(u)"
+ else
+ msg_="$msg_`awk -v m=$m '$2==m { if ( $4 ~ /^ro,/ ) {printf"%s(ro)",$2 } ; }' /proc/mounts`"
+ fi
+ done
+
+ if [ -z "$msg_" ] ; then
+ echo "disks ok" ; exit 0
+ else
+ echo "$msg_" ; exit 2
+ fi
+
+}
+
+function check_link {
+ /usr/bin/snmpwalk -t 5 -Oe -Oq -Os -v 1 -c public localhost if | \
+ awk ' {
+ split($1,a,".") ;
+ if ( a[1] == "ifIndex" ) { ifIndex[a[2]] = $2 }
+ if ( a[1] == "ifDescr" ) { ifDescr[a[2]] = $2 }
+ if ( a[1] == "ifType" ) { ifType[a[2]] = $2 }
+ if ( a[1] == "ifSpeed" ) { ifSpeed[a[2]] = $2 }
+ if ( a[1] == "ifAdminStatus" ) { ifAdminStatus[a[2]] = $2 }
+ if ( a[1] == "ifOperStatus" ) { ifOperStatus[a[2]] = $2 }
+ }
+ END {
+ up=0;
+ for (i in ifIndex ) {
+ if ( ifType[i] == 6 && ifAdminStatus[i] == 1 && ifOperStatus[i] == 1 && ifSpeed[i] == 1000000000 ) {
+ up=i;
+ }
+ }
+ if ( up == 0 ) { print "check link" ; exit 2 }
+ else { print ifDescr[up],"ok" }
+ }'
+ exit $? ;
+}
+
+for check in disks link ; do
+ msg=`check_${check}` ;
+ if [ $? -eq 0 ] ; then
+ ok_msg="$ok_msg$msg,"
+ else
+ err_msg="$err_msg$msg,"
+ fi
+done
+
+if [ ! -z "$err_msg" ] ; then
+ echo -n "ERROR $err_msg "
+fi
+if [ ! -z "$ok_msg" ] ; then
+ echo -n "OK: $ok_msg"
+fi
+echo
+exit 0
diff --git a/src/examples/mapreduce/org/apache/hadoop/hbase/mapreduce/IndexBuilder.java b/src/examples/mapreduce/org/apache/hadoop/hbase/mapreduce/IndexBuilder.java
index 31c1b38dfe47..0d092d090c27 100644
--- a/src/examples/mapreduce/org/apache/hadoop/hbase/mapreduce/IndexBuilder.java
+++ b/src/examples/mapreduce/org/apache/hadoop/hbase/mapreduce/IndexBuilder.java
@@ -1,6 +1,4 @@
-/**
- * Copyright 2009 The Apache Software Foundation
- *
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
diff --git a/src/examples/mapreduce/org/apache/hadoop/hbase/mapreduce/SampleUploader.java b/src/examples/mapreduce/org/apache/hadoop/hbase/mapreduce/SampleUploader.java
index 5629ccac1946..108d65283345 100644
--- a/src/examples/mapreduce/org/apache/hadoop/hbase/mapreduce/SampleUploader.java
+++ b/src/examples/mapreduce/org/apache/hadoop/hbase/mapreduce/SampleUploader.java
@@ -1,6 +1,4 @@
-/**
- * Copyright 2009 The Apache Software Foundation
- *
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
diff --git a/src/examples/thrift/DemoClient.cpp b/src/examples/thrift/DemoClient.cpp
index 06cbc4460d17..e845669cd89d 100644
--- a/src/examples/thrift/DemoClient.cpp
+++ b/src/examples/thrift/DemoClient.cpp
@@ -1,6 +1,4 @@
/**
- * Copyright 2008 The Apache Software Foundation
- *
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
diff --git a/src/examples/thrift/DemoClient.java b/src/examples/thrift/DemoClient.java
index bb03fccd4338..036d7fd022ad 100644
--- a/src/examples/thrift/DemoClient.java
+++ b/src/examples/thrift/DemoClient.java
@@ -1,6 +1,4 @@
-/**
- * Copyright 2008 The Apache Software Foundation
- *
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
diff --git a/src/examples/thrift/DemoClient.php b/src/examples/thrift/DemoClient.php
index 669f2b6fc2fd..93d79d43f509 100644
--- a/src/examples/thrift/DemoClient.php
+++ b/src/examples/thrift/DemoClient.php
@@ -1,7 +1,5 @@
new ( column => "entry:$key", value => $valid ) ];
# This is another way to use the Mutation class
my $mutation = Hbase::Mutation->new ();
$mutation->{column} = "entry:$key";
@@ -151,7 +148,7 @@ ($)
# non-utf8 is not allowed in row names
eval {
- $mutations = [ Hbase::Mutation->new ( column => "entry:$key", value => $invalid ) ];
+ $mutations = [ Hbase::Mutation->new ( { column => "entry:$key", value => $invalid } ) ];
# this can throw a TApplicationException (HASH) error
$client->mutateRow ($demo_table, $key, $mutations);
die ("shouldn't get here!");
diff --git a/src/examples/thrift/DemoClient.py b/src/examples/thrift/DemoClient.py
index eabbbe84371d..723d7a894c29 100755
--- a/src/examples/thrift/DemoClient.py
+++ b/src/examples/thrift/DemoClient.py
@@ -1,6 +1,5 @@
#!/usr/bin/python
-'''Copyright 2008 The Apache Software Foundation
-
+'''
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
diff --git a/src/examples/thrift/DemoClient.rb b/src/examples/thrift/DemoClient.rb
index 2b7b5e7f934d..ea5acad926b2 100644
--- a/src/examples/thrift/DemoClient.rb
+++ b/src/examples/thrift/DemoClient.rb
@@ -1,7 +1,5 @@
#!/usr/bin/ruby
-# Copyright 2008 The Apache Software Foundation
-#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
diff --git a/src/examples/thrift/Makefile b/src/examples/thrift/Makefile
index 691a1e981ae9..da2f3010a302 100644
--- a/src/examples/thrift/Makefile
+++ b/src/examples/thrift/Makefile
@@ -1,5 +1,3 @@
-# Copyright 2008 The Apache Software Foundation
-#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
diff --git a/src/examples/thrift2/DemoClient.java b/src/examples/thrift2/DemoClient.java
index d5b805c83e21..828b4c01d357 100644
--- a/src/examples/thrift2/DemoClient.java
+++ b/src/examples/thrift2/DemoClient.java
@@ -1,6 +1,4 @@
-/**
- * Copyright 2011 The Apache Software Foundation
- *
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
diff --git a/src/examples/thrift2/DemoClient.py b/src/examples/thrift2/DemoClient.py
index 67abc5b5be04..3a3ebbbbbc93 100644
--- a/src/examples/thrift2/DemoClient.py
+++ b/src/examples/thrift2/DemoClient.py
@@ -1,6 +1,4 @@
"""
- Copyright 2011 The Apache Software Foundation
-
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
diff --git a/src/main/appended-resources/META-INF/LICENSE b/src/main/appended-resources/META-INF/LICENSE
new file mode 100644
index 000000000000..6ec590ec20e6
--- /dev/null
+++ b/src/main/appended-resources/META-INF/LICENSE
@@ -0,0 +1,37 @@
+----
+This project incorporates portions of the 'Protocol Buffers' project avaialble
+under a '3-clause BSD' license.
+
+ Copyright 2008, Google Inc.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Google Inc. nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ Code generated by the Protocol Buffer compiler is owned by the owner
+ of the input file used when generating it. This code is not
+ standalone and requires a support library to be linked with it. This
+ support library is itself covered by the above license.
diff --git a/src/main/appended-resources/META-INF/NOTICE b/src/main/appended-resources/META-INF/NOTICE
new file mode 100644
index 000000000000..d8f61099f299
--- /dev/null
+++ b/src/main/appended-resources/META-INF/NOTICE
@@ -0,0 +1,6 @@
+--
+This product incorporates portions of the 'Hadoop' project
+
+Copyright 2007-2009 The Apache Software Foundation
+
+Licensed under the Apache License v2.0
diff --git a/src/main/jamon/org/apache/hadoop/hbase/tmpl/common/TaskMonitorTmpl.jamon b/src/main/jamon/org/apache/hadoop/hbase/tmpl/common/TaskMonitorTmpl.jamon
index 4379ef59b5cd..0304154c00b6 100644
--- a/src/main/jamon/org/apache/hadoop/hbase/tmpl/common/TaskMonitorTmpl.jamon
+++ b/src/main/jamon/org/apache/hadoop/hbase/tmpl/common/TaskMonitorTmpl.jamon
@@ -1,6 +1,4 @@
<%doc>
-Copyright 2011 The Apache Software Foundation
-
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
diff --git a/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon b/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
index 0dc0691a894c..e06c5e6124f5 100644
--- a/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
+++ b/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
@@ -1,6 +1,4 @@
<%doc>
-Copyright 2011 The Apache Software Foundation
-
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
diff --git a/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon b/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon
new file mode 100644
index 000000000000..ca8308b5e3fc
--- /dev/null
+++ b/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon
@@ -0,0 +1,77 @@
+<%doc>
+
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+%doc>
+<%args>
+HMaster master;
+%args>
+<%import>
+java.util.*;
+org.apache.hadoop.hbase.util.Bytes;
+org.apache.hadoop.hbase.ServerName;
+org.apache.hadoop.hbase.ClusterStatus;
+org.apache.hadoop.hbase.master.HMaster;
+org.apache.hadoop.hbase.master.ServerManager;
+org.apache.hadoop.hbase.master.AssignmentManager;
+org.apache.hadoop.hbase.master.ActiveMasterManager;
+%import>
+<%java>
+Collection masters;
+
+if (master.isActiveMaster()) {
+ ClusterStatus status = master.getClusterStatus();
+ masters = status.getBackupMasters();
+} else{
+ ServerName sn = master.getActiveMasterManager().getActiveMaster() ;
+ assert sn != null : "Failed to retreive master's ServerName!";
+
+ List serverNames = new ArrayList(1);
+ serverNames.add(sn);
+ masters = Collections.unmodifiableCollection(serverNames);
+}
+%java>
+
+<%java>
+ServerName [] serverNames = masters.toArray(new ServerName[masters.size()]);
+%java>
+<%if (!master.isActiveMaster()) %>
+
-
+<% } %>
<%
}
%>
diff --git a/src/main/resources/hbase-webapps/rest/index.html b/src/main/resources/hbase-webapps/rest/index.html
new file mode 100644
index 000000000000..e4084b7c4887
--- /dev/null
+++ b/src/main/resources/hbase-webapps/rest/index.html
@@ -0,0 +1,20 @@
+
+
diff --git a/src/main/resources/hbase-webapps/rest/rest.jsp b/src/main/resources/hbase-webapps/rest/rest.jsp
new file mode 100644
index 000000000000..ba9856c13a8f
--- /dev/null
+++ b/src/main/resources/hbase-webapps/rest/rest.jsp
@@ -0,0 +1,74 @@
+<%--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+--%>
+<%@ page contentType="text/html;charset=UTF-8"
+ import="org.apache.hadoop.conf.Configuration"
+ import="org.apache.hadoop.hbase.HBaseConfiguration"
+ import="org.apache.hadoop.hbase.util.VersionInfo"
+ import="java.util.Date"
+%>
+
+<%
+Configuration conf = (Configuration)getServletContext().getAttribute("hbase.conf");
+long startcode = conf.getLong("startcode", System.currentTimeMillis());
+String listenPort = conf.get("hbase.rest.port", "8080");
+String serverInfo = listenPort + "," + String.valueOf(startcode);
+%>
+
+
+
+
+
+
+
+Apache HBase Wiki on REST
+
+
+
diff --git a/src/main/resources/hbase-webapps/thrift/index.html b/src/main/resources/hbase-webapps/thrift/index.html
new file mode 100644
index 000000000000..9925269e8959
--- /dev/null
+++ b/src/main/resources/hbase-webapps/thrift/index.html
@@ -0,0 +1,20 @@
+
+
diff --git a/src/main/resources/hbase-webapps/thrift/thrift.jsp b/src/main/resources/hbase-webapps/thrift/thrift.jsp
new file mode 100644
index 000000000000..eee99406b7db
--- /dev/null
+++ b/src/main/resources/hbase-webapps/thrift/thrift.jsp
@@ -0,0 +1,80 @@
+<%--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+--%>
+<%@ page contentType="text/html;charset=UTF-8"
+ import="org.apache.hadoop.conf.Configuration"
+ import="org.apache.hadoop.hbase.HBaseConfiguration"
+ import="org.apache.hadoop.hbase.util.VersionInfo"
+ import="java.util.Date"
+%>
+
+<%
+Configuration conf = (Configuration)getServletContext().getAttribute("hbase.conf");
+long startcode = conf.getLong("startcode", System.currentTimeMillis());
+String listenPort = conf.get("hbase.regionserver.thrift.port", "9090");
+String serverInfo = listenPort + "," + String.valueOf(startcode);
+String implType = conf.get("hbase.regionserver.thrift.server.type", "threadpool");
+String compact = conf.get("hbase.regionserver.thrift.compact", "false");
+String framed = conf.get("hbase.regionserver.thrift.framed", "false");
+%>
+
+
+
+
+
+HBase Thrift Server
+
+
+
+
+
+
Thrift RPC engine implementation type chosen by this Thrift server
+
Compact Protocol
<%= compact %>
Thrift RPC engine uses compact protocol
+
Framed Transport
<%= framed %>
Thrift RPC engine uses framed transport
+
+
+
+Apache HBase Wiki on Thrift
+
+
+
diff --git a/src/main/resources/org/apache/hadoop/hbase/rest/XMLSchema.xsd b/src/main/resources/org/apache/hadoop/hbase/rest/XMLSchema.xsd
index de4fff1761cd..9577ce23eac5 100644
--- a/src/main/resources/org/apache/hadoop/hbase/rest/XMLSchema.xsd
+++ b/src/main/resources/org/apache/hadoop/hbase/rest/XMLSchema.xsd
@@ -98,7 +98,7 @@
-
+
@@ -111,8 +111,6 @@
-
-
@@ -166,6 +164,13 @@
+
+
+
+
+
+
+
diff --git a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/CellMessage.proto b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/CellMessage.proto
index a7bfe83b79ed..8d4515417f08 100644
--- a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/CellMessage.proto
+++ b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/CellMessage.proto
@@ -1,4 +1,3 @@
-// Copyright 2010 The Apache Software Foundation
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
diff --git a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/CellSetMessage.proto b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/CellSetMessage.proto
index dfdf125ed44d..4774a8d0e8f7 100644
--- a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/CellSetMessage.proto
+++ b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/CellSetMessage.proto
@@ -1,4 +1,3 @@
-// Copyright 2010 The Apache Software Foundation
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
diff --git a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/ColumnSchemaMessage.proto b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/ColumnSchemaMessage.proto
index 0a9a9af68657..05e33b648329 100644
--- a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/ColumnSchemaMessage.proto
+++ b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/ColumnSchemaMessage.proto
@@ -1,4 +1,3 @@
-// Copyright 2010 The Apache Software Foundation
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
diff --git a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/ScannerMessage.proto b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/ScannerMessage.proto
index 6ef3191b0a60..f7aca47131ff 100644
--- a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/ScannerMessage.proto
+++ b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/ScannerMessage.proto
@@ -1,4 +1,3 @@
-// Copyright 2010 The Apache Software Foundation
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
diff --git a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/StorageClusterStatusMessage.proto b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/StorageClusterStatusMessage.proto
index 2b032f7f4ac2..a0291b4e9e8a 100644
--- a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/StorageClusterStatusMessage.proto
+++ b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/StorageClusterStatusMessage.proto
@@ -1,4 +1,3 @@
-// Copyright 2010 The Apache Software Foundation
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
@@ -26,6 +25,13 @@ message StorageClusterStatus {
optional int32 storefileSizeMB = 4;
optional int32 memstoreSizeMB = 5;
optional int32 storefileIndexSizeMB = 6;
+ optional int64 readRequestsCount = 7;
+ optional int64 writeRequestsCount = 8;
+ optional int32 rootIndexSizeKB = 9;
+ optional int32 totalStaticIndexSizeKB = 10;
+ optional int32 totalStaticBloomSizeKB = 11;
+ optional int64 totalCompactingKVs = 12;
+ optional int64 currentCompactedKVs = 13;
}
message Node {
required string name = 1; // name:port
diff --git a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/TableInfoMessage.proto b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/TableInfoMessage.proto
index 5dd91204247a..674499c26874 100644
--- a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/TableInfoMessage.proto
+++ b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/TableInfoMessage.proto
@@ -1,4 +1,3 @@
-// Copyright 2010 The Apache Software Foundation
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
diff --git a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/TableListMessage.proto b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/TableListMessage.proto
index 2ce4d25ee8aa..fbd76ea029af 100644
--- a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/TableListMessage.proto
+++ b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/TableListMessage.proto
@@ -1,4 +1,3 @@
-// Copyright 2010 The Apache Software Foundation
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
diff --git a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/TableSchemaMessage.proto b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/TableSchemaMessage.proto
index d8177229d45b..47a4da589b95 100644
--- a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/TableSchemaMessage.proto
+++ b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/TableSchemaMessage.proto
@@ -1,4 +1,3 @@
-// Copyright 2010 The Apache Software Foundation
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
diff --git a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/VersionMessage.proto b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/VersionMessage.proto
index 2404a2ebd35d..cc107b3d9a61 100644
--- a/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/VersionMessage.proto
+++ b/src/main/resources/org/apache/hadoop/hbase/rest/protobuf/VersionMessage.proto
@@ -1,4 +1,3 @@
-// Copyright 2010 The Apache Software Foundation
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
diff --git a/src/main/resources/org/apache/hadoop/hbase/thrift/Hbase.thrift b/src/main/resources/org/apache/hadoop/hbase/thrift/Hbase.thrift
index f698a6c8f67f..890339d5d9b9 100644
--- a/src/main/resources/org/apache/hadoop/hbase/thrift/Hbase.thrift
+++ b/src/main/resources/org/apache/hadoop/hbase/thrift/Hbase.thrift
@@ -110,13 +110,32 @@ struct BatchMutation {
2:list mutations
}
+/**
+ * For increments that are not incrementColumnValue
+ * equivalents.
+ */
+struct TIncrement {
+ 1:Text table,
+ 2:Text row,
+ 3:Text column,
+ 4:i64 ammount
+}
+
+/**
+ * Holds column name and the cell.
+ */
+struct TColumn {
+ 1:Text columnName,
+ 2:TCell cell
+ }
/**
* Holds row name and then a map of columns to cells.
*/
struct TRowResult {
1:Text row,
- 2:map columns
+ 2:optional map columns,
+ 3:optional list sortedColumns
}
/**
@@ -128,7 +147,8 @@ struct TScan {
3:optional i64 timestamp,
4:optional list columns,
5:optional i32 caching,
- 6:optional Text filterString
+ 6:optional Text filterString,
+ 7:optional bool sortColumns
}
//
@@ -627,6 +647,23 @@ service Hbase {
3:map attributes
) throws (1:IOError io)
+ /**
+ * Increment a cell by the ammount.
+ * Increments can be applied async if hbase.regionserver.thrift.coalesceIncrement is set to true.
+ * False is the default. Turn to true if you need the extra performance and can accept some
+ * data loss if a thrift server dies with increments still in the queue.
+ */
+ void increment(
+ /** The single increment to apply */
+ 1:TIncrement increment
+ ) throws (1:IOError io)
+
+
+ void incrementRows(
+ /** The list of increments */
+ 1:list increments
+ ) throws (1:IOError io)
+
/**
* Completely delete the row's cells marked with a timestamp
* equal-to or older than the passed timestamp.
diff --git a/src/main/resources/org/apache/hadoop/hbase/thrift2/hbase.thrift b/src/main/resources/org/apache/hadoop/hbase/thrift2/hbase.thrift
index 5bb0f51cbd3c..502cea925aa5 100644
--- a/src/main/resources/org/apache/hadoop/hbase/thrift2/hbase.thrift
+++ b/src/main/resources/org/apache/hadoop/hbase/thrift2/hbase.thrift
@@ -77,6 +77,21 @@ enum TDeleteType {
DELETE_COLUMNS = 1
}
+/**
+ * Specify Durability:
+ * - SKIP_WAL means do not write the Mutation to the WAL.
+ * - ASYNC_WAL means write the Mutation to the WAL asynchronously,
+ * - SYNC_WAL means write the Mutation to the WAL synchronously,
+ * - FSYNC_WAL means Write the Mutation to the WAL synchronously and force the entries to disk.
+ */
+
+enum TDurability {
+ SKIP_WAL = 1,
+ ASYNC_WAL = 2,
+ SYNC_WAL = 3,
+ FSYNC_WAL = 4
+}
+
/**
* Used to perform Get operations on a single row.
*
@@ -89,8 +104,6 @@ enum TDeleteType {
*
* If you specify a time range and a timestamp the range is ignored.
* Timestamps on TColumns are ignored.
- *
- * TODO: Filter, Locks
*/
struct TGet {
1: required binary row,
@@ -100,6 +113,8 @@ struct TGet {
4: optional TTimeRange timeRange,
5: optional i32 maxVersions,
+ 6: optional binary filterString,
+ 7: optional map attributes
}
/**
@@ -117,7 +132,9 @@ struct TPut {
1: required binary row,
2: required list columnValues
3: optional i64 timestamp,
- 4: optional bool writeToWal = 1
+ 4: optional bool writeToWal,
+ 5: optional map