elastic · vladimirdolzhenko · Sep 19, 2018 · Jul 23, 2018 · Jul 31, 2018 · Jul 23, 2018
diff --git a/distribution/src/bin/elasticsearch-shard b/distribution/src/bin/elasticsearch-shard
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+ES_MAIN_CLASS=org.elasticsearch.index.shard.ShardToolCli \
+  "`dirname "$0"`"/elasticsearch-cli \
+  "$@"
diff --git a/...bution/src/bin/elasticsearch-translog.bat → distribution/src/bin/elasticsearch-shard.bat b/...bution/src/bin/elasticsearch-translog.bat → distribution/src/bin/elasticsearch-shard.bat
@@ -3,7 +3,7 @@
 setlocal enabledelayedexpansion
 setlocal enableextensions
 
-set ES_MAIN_CLASS=org.elasticsearch.index.translog.TranslogToolCli
+set ES_MAIN_CLASS=org.elasticsearch.index.shard.ShardToolCli
 call "%~dp0elasticsearch-cli.bat" ^
   %%* ^
   || exit /b 1

diff --git a/distribution/src/bin/elasticsearch-translog b/distribution/src/bin/elasticsearch-translog
diff --git a/docs/reference/index-modules.asciidoc b/docs/reference/index-modules.asciidoc
@@ -63,12 +63,6 @@ corruption is detected, it will prevent the shard from being opened. Accepts:
     Check for both physical and logical corruption. This is much more
     expensive in terms of CPU and memory usage.
 
-`fix`::
-
-    Check for both physical and logical corruption.  Segments that were reported
-    as corrupted will be automatically removed. This option *may result in data loss*.
-    Use with extreme caution!
-
 WARNING: Expert only. Checking shards may take a lot of time on large indices.
 --
 
@@ -279,6 +273,10 @@ Other index settings are available in index modules:
 
     Control over the transaction log and background flush operations.
 
+<<index-modules-command-line-tools,Command-line tools>>::
+
+    Command-line tools if shard is corrupted
+
 --
 
 include::index-modules/analysis.asciidoc[]
@@ -297,4 +295,6 @@ include::index-modules/store.asciidoc[]
 
 include::index-modules/translog.asciidoc[]
 
+include::index-modules/command-line-tools.asciidoc[]
+
 include::index-modules/index-sorting.asciidoc[]
diff --git a/docs/reference/index-modules/command-line-tools.asciidoc b/docs/reference/index-modules/command-line-tools.asciidoc
@@ -0,0 +1,213 @@
+[[index-modules-command-line-tools]]
+
+== Command-line tools
+
+In some cases (a bad drive, user error) the translog or Lucene index on a shard copy
+can become corrupted. When this corruption is detected by Elasticsearch due to mismatching
+checksums, Elasticsearch will fail that shard copy and refuse to use that copy
+of the data.
+
+*Note*: If there are other copies of the shard available then
+Elasticsearch will automatically recover from one of them using the normal
+shard allocation and recovery mechanism.  In particular, if the corrupt shard
+copy was the primary when the corruption was detected then one of its replicas
+will be promoted in its place.
+
+You can also use <<modules-snapshots,snapshot and restore>> to restore the index.
+
+Please consider using of this tool like the last resort if there is no copy of the data
+from which Elasticsearch can recover successfully.
+
+We provide a command-line tool for this - `elasticsearch-shard`.
+
+The cost of applying this tool is losing the corrupted data. It could be any lost data
+regardless of time: could be a recent or an old data.
+
+[WARNING]
+The `elasticsearch-shard` tool should *not* be run while Elasticsearch is
+running. If you attempt to run this tool while Elasticsearch is running, you
+will permanently lose the documents that were contained only in the translog!
+
+[WARNING]
+After dropping the corrupted part the allocation id of the shard is changed.
+`elasticsearch-shard` provides details of command that has to be run after the node
+restart to apply changes:
+You should run follow command to apply allocation id changes:
+[source,txt]
+--------------------------------------------------
+$ curl -XPOST 'http://localhost:9200/_cluster/reroute' -d '
+{
+  "commands" : [
+    {
+      "allocate_stale_primary" : {
+        "index" : "twitter",
+        "shard" : 0,
+        "node" : "pAfJBgAAQACIfI2M_____w",
+        "accept_data_loss" : true
+      }
+    }
+  ]
+}'
+--------------------------------------------------
+
+=== What to do if the translog becomes corrupted?
+
+In order to drop corrupted translog use `truncate-translog` subcommand:
+
+* you should specify index name with `--index` and shard id `--shard-id`
+* or specify the full path to corrupted translog with the `-d` option
+
+[source,txt]
+--------------------------------------------------
+$ bin/elasticsearch-shard truncate-translog -d /var/lib/elasticsearchdata/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/
+Checking existing translog files
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!   WARNING: Elasticsearch MUST be stopped before running this tool   !
+!                                                                     !
+!   WARNING:    Documents inside of translog files will be lost       !
+!                                                                     !
+!   WARNING:          The following files will be DELETED!            !
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+--> data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog-41.ckp
+--> data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog-6.ckp
+--> data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog-37.ckp
+--> data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog-24.ckp
+--> data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog-11.ckp
+
+Continue and DELETE files? [y/N] y
+Reading translog UUID information from Lucene commit from shard at [data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/index]
+Translog Generation: 3
+Translog UUID      : AxqC4rocTC6e0fwsljAh-Q
+Removing existing translog files
+Creating new empty checkpoint at [data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog.ckp]
+Creating new empty translog at [data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog-3.tlog]
+
+Marking index with the new history uuid : TAUddBstTciV9wAiA6sKFA
+Changing allocation id ceU7CskxT4yRw4M-GLO1mg to nFa2DcCsSlady4LlnJeaEQ
+You should run follow command to apply allocation id changes:
+
+$ curl -XPOST 'http://localhost:9200/_cluster/reroute' -d '
+{
+  "commands" : [
+    {
+      "allocate_stale_primary" : {
+        "index" : "twitter",
+        "shard" : 0,
+        "node" : "pAfJBgAAQACIfI2M_____w",
+        "accept_data_loss" : true
+      }
+    }
+  ]
+}'
+Done.
+--------------------------------------------------
+
+=== What to do if the Lucene index becomes corrupted?
+
+In a similar cases the index on a shard copy can become corrupted.
+Like in case with corrupted translog when index corruption is detected by Elasticsearch due
+to mismatching checksums, Elasticsearch will fail that shard copy and refuse to use that copy of the data.
+If there are other copies of the shard available then Elasticsearch will automatically recover from one of
+them using the normal shard allocation and recovery mechanism.
+
+In order to remove corrupted segments use `remove-corrupted-segments` subcommand:
+It writes a new segments file that removes reference to problematic (corrupted) Lucene segments if there is
+no copy of the data from which Elasticsearch can recover successfully.
+
+Highly recommended to make a complete backup of your index before using this to remove corrupted documents
+from your index!
+
+* you should specify index name with `--index` and shard id `--shard-id`
+* or specify the full path to corrupted translog with the `-d` option
+
+You can get an overview of the corruption with `--dry-run` option :
+
+[source,txt]
+--------------------------------------------------
+$ bin/elasticsearch-shard remove-corrupted-segments --dry-run --index twitter --shard-id 0
+
+Opening index @ /var/lib/elasticsearchdata/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/index/
+
+WARNING: Corrupted segments found - 568 documents are damaged
+
+--------------------------------------------------
+
+Running `remove-corrupted-segments` without `--dry-run` requires interactive confirmation to drop damaged segments:
+
+[source,txt]
+--------------------------------------------------
+
+$ bin/elasticsearch-shard remove-corrupted-segments --index twitter --shard-id 0
+
+Opening index @ /var/lib/elasticsearchdata/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/index/
+
+Segments file=segments_8 numSegments=6 version=7.4.0 id=efcaej17mrbjqf9jf5js52e94 userData={history_uuid=3Mu-8x3zTMm8TIZxwTkTZw, local_checkpoint=1896, max_seq_no=1896, max_unsafe_auto_id_timestamp=-1, translog_generation=7, translog_uuid=2n8vuupLQWSh5LDQzRe2fQ}
+  1 of 2: name=_0 maxDoc=1
+    version=7.4.0
+    id=efcaej17mrbjqf9jf5js52e8k
+    codec=Lucene70
+    compound=true
+    numFiles=3
+    size (MB)=0.004
+    diagnostics = {java.runtime.version=10.0.2+13, java.vendor=Oracle Corporation, java.version=10.0.2, java.vm.version=10.0.2+13, lucene.version=7.4.0, os=Mac OS X, os.arch=x86_64, os.version=10.13.6, source=flush, timestamp=1532081797245}
+    no deletions
+    test: open reader.........OK [took 0.001 sec]
+    test: check integrity.....OK [took 0.000 sec]
+    test: check live docs.....OK [took 0.000 sec]
+    test: field infos.........OK [9 fields] [took 0.000 sec]
+    test: field norms.........OK [2 fields] [took 0.000 sec]
+    test: terms, freq, prox...OK [5 terms; 5 terms/docs pairs; 2 tokens] [took 0.000 sec]
+    test: stored fields.......OK [2 total field count; avg 2.0 fields per doc] [took 0.000 sec]
+    test: term vectors........OK [0 total term vector count; avg 0.0 term/freq vector fields per doc] [took 0.000 sec]
+    test: docvalues...........OK [5 docvalues fields; 0 BINARY; 3 NUMERIC; 0 SORTED; 0 SORTED_NUMERIC; 2 SORTED_SET] [took 0.000 sec]
+    test: points..............OK [1 fields, 1 points] [took 0.000 sec]
+
+  2 of 2: name=_1 maxDoc=568
+    version=7.4.0
+    id=efcaej17mrbjqf9jf5js52e8q
+    codec=Lucene70
+    compound=true
+    numFiles=3
+    size (MB)=1.148
+    diagnostics = {java.runtime.version=10.0.2+13, java.vendor=Oracle Corporation, java.version=10.0.2, java.vm.version=10.0.2+13, lucene.version=7.4.0, os=Mac OS X, os.arch=x86_64, os.version=10.13.6, source=flush, timestamp=1532081798123}
+    no deletions
+    test: open reader.........FAILED
+    WARNING: exorciseIndex() would remove reference to this segment;
+
+WARNING: 1 broken segments (containing 568 documents) detected
+Took 0.049 sec total.
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!   WARNING:                568 documents will be lost.               !
+!                                                                     !
+!   WARNING:            YOU WILL LOSE DATA.                           !
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+Continue and remove 568 docs from the index ? [y/N]
+
+Writing...
+OK
+Wrote new segments file "segments_8"
+
+Marking index with the new history uuid : TAUddBstTciV9wAiA6sKFA
+Changing allocation id ceU7CskxT4yRw4M-GLO1mg to nFa2DcCsSlady4LlnJeaEQ
+You should run follow command to apply allocation id changes:
+
+$ curl -XPOST 'http://localhost:9200/_cluster/reroute' -d '
+{
+  "commands" : [
+    {
+      "allocate_stale_primary" : {
+        "index" : "twitter",
+        "shard" : 0,
+        "node" : "pAfJBgAAQACIfI2M_____w",
+        "accept_data_loss" : true
+      }
+    }
+  ]
+}'
+Deleted corrupt marker corrupted_cJv5hCxeTE2p3AucpgCJyg
+
+--------------------------------------------------
+
+You can also use the `-h` option to get a list of all options and parameters
+that the `elasticsearch-shard` tool supports.
diff --git a/docs/reference/index-modules/translog.asciidoc b/docs/reference/index-modules/translog.asciidoc
@@ -88,59 +88,4 @@ file based sync. Defaults to `512mb`
 The maximum duration for which translog files will be kept. Defaults to `12h`.
 
 
-[float]
-[[corrupt-translog-truncation]]
-=== What to do if the translog becomes corrupted?
-
-In some cases (a bad drive, user error) the translog on a shard copy can become
-corrupted. When this corruption is detected by Elasticsearch due to mismatching
-checksums, Elasticsearch will fail that shard copy and refuse to use that copy
-of the data.  If there are other copies of the shard available then
-Elasticsearch will automatically recover from one of them using the normal
-shard allocation and recovery mechanism.  In particular, if the corrupt shard
-copy was the primary when the corruption was detected then one of its replicas
-will be promoted in its place.
-
-If there is no copy of the data from which Elasticsearch can recover
-successfully, a user may want to recover the data that is part of the shard at
-the cost of losing the data that is currently contained in the translog. We
-provide a command-line tool for this, `elasticsearch-translog`.
-
-[WARNING]
-The `elasticsearch-translog` tool should *not* be run while Elasticsearch is
-running. If you attempt to run this tool while Elasticsearch is running, you 
-will permanently lose the documents that were contained only in the translog!
-
-In order to run the `elasticsearch-translog` tool, specify the `truncate`
-subcommand as well as the directory for the corrupted translog with the `-d`
-option:
-
-[source,txt]
---------------------------------------------------
-$ bin/elasticsearch-translog truncate -d /var/lib/elasticsearchdata/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/
-Checking existing translog files
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!   WARNING: Elasticsearch MUST be stopped before running this tool   !
-!                                                                     !
-!   WARNING:    Documents inside of translog files will be lost       !
-!                                                                     !
-!   WARNING:          The following files will be DELETED!            !
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
---> data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog-41.ckp
---> data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog-6.ckp
---> data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog-37.ckp
---> data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog-24.ckp
---> data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog-11.ckp
-
-Continue and DELETE files? [y/N] y
-Reading translog UUID information from Lucene commit from shard at [data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/index]
-Translog Generation: 3
-Translog UUID      : AxqC4rocTC6e0fwsljAh-Q
-Removing existing translog files
-Creating new empty checkpoint at [data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog.ckp]
-Creating new empty translog at [data/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/translog-3.tlog]
-Done.
---------------------------------------------------
-
-You can also use the `-h` option to get a list of all options and parameters
-that the `elasticsearch-translog` tool supports.
+[float]
diff --git a/docs/reference/migration/migrate_7_0/indices.asciidoc b/docs/reference/migration/migrate_7_0/indices.asciidoc
@@ -78,3 +78,12 @@ The parent circuit breaker defines a new setting `indices.breaker.total.use_real
 heap memory instead of only considering the reserved memory by child circuit breakers. When this
 setting is `true`, the default parent breaker limit also changes from 70% to 95% of the JVM heap size.
 The previous behavior can be restored by setting `indices.breaker.total.use_real_memory` to `false`.
+
+==== `fix` value for `index.shard.check_on_startup` is removed
+
+`elasticsearch-shard remove-corrupted-segments` tool has to be used instead of
+`index.shard.check_on_startup: fix` setting.
+
+==== `elasticsearch-translog` tool merged into `elasticsearch-shard`
+
+Instead of `elasticsearch-translog` tool you should use `elasticsearch-shard truncate-translog`.
diff --git a/libs/cli/src/main/java/org/elasticsearch/cli/Terminal.java b/libs/cli/src/main/java/org/elasticsearch/cli/Terminal.java
@@ -85,12 +85,17 @@ public final void println(Verbosity verbosity, String msg) {
 
     /** Prints message to the terminal at {@code verbosity} level, without a newline. */
     public final void print(Verbosity verbosity, String msg) {
-        if (this.verbosity.ordinal() >= verbosity.ordinal()) {
+        if (isPrintable(verbosity)) {
             getWriter().print(msg);
             getWriter().flush();
         }
     }
 
+    /** Checks if is enough {@code verbosity} level to be printed */
+    public final boolean isPrintable(Verbosity verbosity) {
+        return this.verbosity.ordinal() >= verbosity.ordinal();
+    }
+
     /**
      * Prompt for a yes or no answer from the user. This method will loop until 'y' or 'n'
      * (or the default empty value) is entered.

diff --git a/qa/vagrant/src/main/java/org/elasticsearch/packaging/test/ArchiveTestCase.java b/qa/vagrant/src/main/java/org/elasticsearch/packaging/test/ArchiveTestCase.java
@@ -325,4 +325,21 @@ public void test90SecurityCliPackaging() {
         }
     }
 
+    public void test100RepairIndexCliPackaging() {
+        assumeThat(installation, is(notNullValue()));
+
+        final Installation.Executables bin = installation.executables();
+        final Shell sh = new Shell();
+
+        Platforms.PlatformAction action = () -> {
+            final Result result = sh.run(bin.elasticsearchShard + " help");
+            assertThat(result.stdout, containsString("A CLI tool to manage shard"));
+        };
+
+        if (distribution().equals(Distribution.DEFAULT_TAR) || distribution().equals(Distribution.DEFAULT_ZIP)) {
+            Platforms.onLinux(action);
+            Platforms.onWindows(action);
+        }
+    }
+
 }
diff --git a/qa/vagrant/src/main/java/org/elasticsearch/packaging/util/Archives.java b/qa/vagrant/src/main/java/org/elasticsearch/packaging/util/Archives.java
@@ -186,7 +186,7 @@ private static void verifyOssInstallation(Installation es, Distribution distribu
             "elasticsearch-env",
             "elasticsearch-keystore",
             "elasticsearch-plugin",
-            "elasticsearch-translog"
+            "elasticsearch-shard"
         ).forEach(executable -> {
 
             assertThat(es.bin(executable), file(File, owner, owner, p755));