diff --git a/csi.proto b/csi.proto index 81627dd6..479c9f2a 100644 --- a/csi.proto +++ b/csi.proto @@ -1328,7 +1328,10 @@ message VolumeHealth { option (alpha_message) = true; enum Condition { - // Volume health condition is unknown; treat it as healthy. + // Volume health condition is unknown; If this is the initial + // report of volume health, treat it as healthy. If there was + // already a condition reported previously, treat unknown as + // unchanged from the previous condition. UNKNOWN = 0; // Volume is accessible and no problems are detected. HEALTHY = 1; @@ -1337,13 +1340,15 @@ message VolumeHealth { // maintenance. TEMPORARY_INACCESSIBLE = 2; // Volume is accessible currently, but is degraded. Data loss - // is unlikely as the storage system is fixing the problem - // automatically. + // is unlikely as the storage system is rebuilding to fix the + // problem automatically. For example, a replica is lost on + // the storage system but a spare is configured, and the storage + // system is resyncing/reconstructing. TEMPORARY_DEGRADED = 3; // Volume is accessible currently, but a problem is detected that // could lead to eventual data loss. Examples: recoverable hardware - // failure, cooling failure, equipment significantly past end of - // life. + // failure (i.e. the disk needs to be manually replaced by a human), + // cooling failure, equipment significantly past end of life. FAILURE_LIKELY = 4; // Volume is not accessible currently, and there is reason to // believe the outage may be permanent. Examples: unrecoverable diff --git a/lib/go/csi/csi.pb.go b/lib/go/csi/csi.pb.go index 3e34f4cb..1a8f0831 100644 --- a/lib/go/csi/csi.pb.go +++ b/lib/go/csi/csi.pb.go @@ -288,7 +288,10 @@ func (VolumeUsage_Unit) EnumDescriptor() ([]byte, []int) { type VolumeHealth_Condition int32 const ( - // Volume health condition is unknown; treat it as healthy. + // Volume health condition is unknown; If this is the initial + // report of volume health, treat it as healthy. If there was + // already a condition reported previously, treat unknown as + // unchanged from the previous condition. VolumeHealth_UNKNOWN VolumeHealth_Condition = 0 // Volume is accessible and no problems are detected. VolumeHealth_HEALTHY VolumeHealth_Condition = 1 @@ -297,13 +300,15 @@ const ( // maintenance. VolumeHealth_TEMPORARY_INACCESSIBLE VolumeHealth_Condition = 2 // Volume is accessible currently, but is degraded. Data loss - // is unlikely as the storage system is fixing the problem - // automatically. + // is unlikely as the storage system is rebuilding to fix the + // problem automatically. For example, a replica is lost on + // the storage system but a spare is configured, and the storage + // system is resyncing/reconstructing. VolumeHealth_TEMPORARY_DEGRADED VolumeHealth_Condition = 3 // Volume is accessible currently, but a problem is detected that // could lead to eventual data loss. Examples: recoverable hardware - // failure, cooling failure, equipment significantly past end of - // life. + // failure (i.e. the disk needs to be manually replaced by a human), + // cooling failure, equipment significantly past end of life. VolumeHealth_FAILURE_LIKELY VolumeHealth_Condition = 4 // Volume is not accessible currently, and there is reason to // believe the outage may be permanent. Examples: unrecoverable diff --git a/spec.md b/spec.md index 940458b7..4af8c032 100644 --- a/spec.md +++ b/spec.md @@ -2336,7 +2336,10 @@ message VolumeHealth { option (alpha_message) = true; enum Condition { - // Volume health condition is unknown; treat it as healthy. + // Volume health condition is unknown; If this is the initial + // report of volume health, treat it as healthy. If there was + // already a condition reported previously, treat unknown as + // unchanged from the previous condition. UNKNOWN = 0; // Volume is accessible and no problems are detected. HEALTHY = 1; @@ -2345,13 +2348,15 @@ message VolumeHealth { // maintenance. TEMPORARY_INACCESSIBLE = 2; // Volume is accessible currently, but is degraded. Data loss - // is unlikely as the storage system is fixing the problem - // automatically. + // is unlikely as the storage system is rebuilding to fix the + // problem automatically. For example, a replica is lost on + // the storage system but a spare is configured, and the storage + // system is resyncing/reconstructing. TEMPORARY_DEGRADED = 3; // Volume is accessible currently, but a problem is detected that // could lead to eventual data loss. Examples: recoverable hardware - // failure, cooling failure, equipment significantly past end of - // life. + // failure (i.e. the disk needs to be manually replaced by a human), + // cooling failure, equipment significantly past end of life. FAILURE_LIKELY = 4; // Volume is not accessible currently, and there is reason to // believe the outage may be permanent. Examples: unrecoverable