Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

windowed post generation now returns faulty sectors #3710

Merged
merged 8 commits into from
Sep 11, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/test/window_post.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ func TestWindowPost(t *testing.T, b APIBuilder, blocktime time.Duration, nSector

// Drop the partition
err = parts[0].Sectors.ForEach(func(sid uint64) error {
return miner.StorageMiner.(*impl.StorageMinerAPI).IStorageMgr.(*mock.SectorMgr).MarkFailed(abi.SectorID{
return miner.StorageMiner.(*impl.StorageMinerAPI).IStorageMgr.(*mock.SectorMgr).MarkCorrupted(abi.SectorID{
Miner: abi.ActorID(mid),
Number: abi.SectorNumber(sid),
}, true)
Expand Down
17 changes: 15 additions & 2 deletions extern/sector-storage/ffiwrapper/verifier_cgo.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,21 @@ func (sb *Sealer) GenerateWindowPoSt(ctx context.Context, minerID abi.ActorID, s
}
defer done()

proof, err := ffi.GenerateWindowPoSt(minerID, privsectors, randomness)
return proof, skipped, err
if len(skipped) > 0 {
return nil, skipped, xerrors.Errorf("pubSectorToPriv skipped some sectors")
}

proof, faulty, err := ffi.GenerateWindowPoSt(minerID, privsectors, randomness)

var faultyIDs []abi.SectorID
for _, f := range faulty {
faultyIDs = append(faultyIDs, abi.SectorID{
Miner: minerID,
Number: f,
})
}

return proof, faultyIDs, err
}

func (sb *Sealer) pubSectorToPriv(ctx context.Context, mid abi.ActorID, sectorInfo []proof.SectorInfo, faults []abi.SectorNumber, rpt func(abi.RegisteredSealProof) (abi.RegisteredPoStProof, error)) (ffi.SortedPrivateSectorInfo, []abi.SectorID, func(), error) {
Expand Down
22 changes: 21 additions & 1 deletion extern/sector-storage/mock/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ const (
type sectorState struct {
pieces []cid.Cid
failed bool
corrupted bool

state int

Expand Down Expand Up @@ -251,6 +252,18 @@ func (mgr *SectorMgr) MarkFailed(sid abi.SectorID, failed bool) error {
return nil
}

func (mgr *SectorMgr) MarkCorrupted(sid abi.SectorID, corrupted bool) error {
mgr.lk.Lock()
defer mgr.lk.Unlock()
ss, ok := mgr.sectors[sid]
if !ok {
return fmt.Errorf("no such sector in storage")
}

ss.corrupted = corrupted
return nil
}

func opFinishWait(ctx context.Context) {
val, ok := ctx.Value("opfinish").(chan struct{})
if !ok {
Expand All @@ -275,6 +288,8 @@ func (mgr *SectorMgr) GenerateWindowPoSt(ctx context.Context, minerID abi.ActorI
si := make([]proof.SectorInfo, 0, len(sectorInfo))
var skipped []abi.SectorID

var err error

for _, info := range sectorInfo {
sid := abi.SectorID{
Miner: minerID,
Expand All @@ -283,13 +298,18 @@ func (mgr *SectorMgr) GenerateWindowPoSt(ctx context.Context, minerID abi.ActorI

_, found := mgr.sectors[sid]

if found && !mgr.sectors[sid].failed {
if found && !mgr.sectors[sid].failed && !mgr.sectors[sid].corrupted {
si = append(si, info)
} else {
skipped = append(skipped, sid)
err = xerrors.Errorf("skipped some sectors")
}
}

if err != nil {
return nil, skipped, err
}

return generateFakePoSt(si, abi.RegisteredSealProof.RegisteredWindowPoStProof, randomness), skipped, nil
}

Expand Down
6 changes: 4 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ module github.com/filecoin-project/lotus

go 1.14

replace github.com/supranational/blst => github.com/supranational/blst v0.1.2-alpha.1

require (
contrib.go.opencensus.io/exporter/jaeger v0.1.0
contrib.go.opencensus.io/exporter/prometheus v0.1.0
Expand Down Expand Up @@ -138,3 +136,7 @@ replace github.com/filecoin-project/filecoin-ffi => ./extern/filecoin-ffi
replace github.com/dgraph-io/badger/v2 => github.com/dgraph-io/badger/v2 v2.0.1-rc1.0.20200716180832-3ab515320794

replace github.com/filecoin-project/test-vectors => ./extern/test-vectors

replace github.com/supranational/blst => ./extern/fil-blst/blst

replace github.com/filecoin-project/fil-blst => ./extern/fil-blst
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ github.com/filecoin-project/specs-actors v0.9.7 h1:7PAZ8kdqwBdmgf/23FCkQZLCXcVu0
github.com/filecoin-project/specs-actors v0.9.7/go.mod h1:wM2z+kwqYgXn5Z7scV1YHLyd1Q1cy0R8HfTIWQ0BFGU=
github.com/filecoin-project/specs-storage v0.1.1-0.20200907031224-ed2e5cd13796 h1:dJsTPWpG2pcTeojO2pyn0c6l+x/3MZYCBgo/9d11JEk=
github.com/filecoin-project/specs-storage v0.1.1-0.20200907031224-ed2e5cd13796/go.mod h1:nJRRM7Aa9XVvygr3W9k6xGF46RWzr2zxF/iGoAIfA/g=
github.com/filecoin-project/specs-storage v0.1.1-0.20200909213410-c066548422be h1:UX457RrC0LwL7Bb5kd0WFyzJBxbHOCSw/64oYqeT+Zc=
github.com/filecoin-project/specs-storage v0.1.1-0.20200909213410-c066548422be/go.mod h1:nJRRM7Aa9XVvygr3W9k6xGF46RWzr2zxF/iGoAIfA/g=
github.com/filecoin-project/test-vectors/schema v0.0.1 h1:5fNF76nl4qolEvcIsjc0kUADlTMVHO73tW4kXXPnsus=
github.com/filecoin-project/test-vectors/schema v0.0.1/go.mod h1:iQ9QXLpYWL3m7warwvK1JC/pTri8mnfEmKygNDqqY6E=
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
Expand Down
146 changes: 83 additions & 63 deletions storage/wdpost_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,94 +335,114 @@ func (s *WindowPoStScheduler) runPost(ctx context.Context, di miner.DeadlineInfo
Proofs: nil,
}

var sinfos []proof.SectorInfo
sidToPart := map[abi.SectorNumber]uint64{}
skipCount := uint64(0)
postSkipped := bitfield.New()
var postOut []proof.PoStProof

for retries := 0; retries < 5; retries++ {
var sinfos []proof.SectorInfo
sidToPart := map[abi.SectorNumber]int{}

for partIdx, partition := range partitions {
// TODO: Can do this in parallel
toProve, err := partition.ActiveSectors()
if err != nil {
return nil, xerrors.Errorf("getting active sectors: %w", err)
}

for partIdx, partition := range partitions {
// TODO: Can do this in parallel
toProve, err := partition.ActiveSectors()
if err != nil {
return nil, xerrors.Errorf("getting active sectors: %w", err)
}
toProve, err = bitfield.MergeBitFields(toProve, partition.Recoveries)
if err != nil {
return nil, xerrors.Errorf("adding recoveries to set of sectors to prove: %w", err)
}

toProve, err = bitfield.MergeBitFields(toProve, partition.Recoveries)
if err != nil {
return nil, xerrors.Errorf("adding recoveries to set of sectors to prove: %w", err)
}
toProve, err = bitfield.SubtractBitField(toProve, postSkipped)
if err != nil {
return nil, xerrors.Errorf("toProve - postSkipped: %w", err)
}

good, err := s.checkSectors(ctx, toProve)
if err != nil {
return nil, xerrors.Errorf("checking sectors to skip: %w", err)
}
good, err := s.checkSectors(ctx, toProve)
if err != nil {
return nil, xerrors.Errorf("checking sectors to skip: %w", err)
}

skipped, err := bitfield.SubtractBitField(toProve, good)
if err != nil {
return nil, xerrors.Errorf("toProve - good: %w", err)
}
skipped, err := bitfield.SubtractBitField(toProve, good)
if err != nil {
return nil, xerrors.Errorf("toProve - good: %w", err)
}

sc, err := skipped.Count()
if err != nil {
return nil, xerrors.Errorf("getting skipped sector count: %w", err)
}
sc, err := skipped.Count()
if err != nil {
return nil, xerrors.Errorf("getting skipped sector count: %w", err)
}

skipCount += sc
skipCount += sc

ssi, err := s.sectorsForProof(ctx, good, partition.Sectors, ts)
if err != nil {
return nil, xerrors.Errorf("getting sorted sector info: %w", err)
ssi, err := s.sectorsForProof(ctx, good, partition.Sectors, ts)
if err != nil {
return nil, xerrors.Errorf("getting sorted sector info: %w", err)
}

if len(ssi) == 0 {
continue
}

sinfos = append(sinfos, ssi...)
for _, si := range ssi {
sidToPart[si.SectorNumber] = partIdx
}

params.Partitions = append(params.Partitions, miner.PoStPartition{
Index: uint64(partIdx),
Skipped: skipped,
})
}

if len(ssi) == 0 {
continue
if len(sinfos) == 0 {
// nothing to prove..
return nil, errNoPartitions
}

sinfos = append(sinfos, ssi...)
for _, si := range ssi {
sidToPart[si.SectorNumber] = uint64(partIdx)
log.Infow("running windowPost",
"chain-random", rand,
"deadline", di,
"height", ts.Height(),
"skipped", skipCount)

tsStart := build.Clock.Now()

mid, err := address.IDFromAddress(s.actor)
if err != nil {
return nil, err
}

params.Partitions = append(params.Partitions, miner.PoStPartition{
Index: uint64(partIdx),
Skipped: skipped,
})
}
var ps []abi.SectorID
postOut, ps, err = s.prover.GenerateWindowPoSt(ctx, abi.ActorID(mid), sinfos, abi.PoStRandomness(rand))
elapsed := time.Since(tsStart)

if len(sinfos) == 0 {
// nothing to prove..
return nil, errNoPartitions
}
log.Infow("computing window PoSt", "elapsed", elapsed)

log.Infow("running windowPost",
"chain-random", rand,
"deadline", di,
"height", ts.Height(),
"skipped", skipCount)
if err == nil {
break
}

tsStart := build.Clock.Now()
if len(ps) == 0 {
return nil, xerrors.Errorf("running post failed: %w", err)
}

mid, err := address.IDFromAddress(s.actor)
if err != nil {
return nil, err
}
log.Warnw("generate window PoSt skipped sectors", "sectors", ps, "error", err, "try", retries)

postOut, postSkipped, err := s.prover.GenerateWindowPoSt(ctx, abi.ActorID(mid), sinfos, abi.PoStRandomness(rand))
if err != nil {
return nil, xerrors.Errorf("running post failed: %w", err)
skipCount += uint64(len(ps))
for _, sector := range ps {
postSkipped.Set(uint64(sector.Number))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll need to fix Sealer.pubSectorToPriv in ffiwrapper/verifier_cgo.go to actually do this. Instead of simply skipping missing sectors, it needs to replace them with the first non-missing sector.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't pass this bitfield straight to the submitPost message - we retry the whole thing (with the loop at L342), assembling the set of sectors from scratch. This bitfield is only used to subtract sectors we couldn't prove in the previous try (L358)

}
}

if len(postOut) == 0 {
return nil, xerrors.Errorf("received proofs back from generate window post")
return nil, xerrors.Errorf("received no proofs back from generate window post")
}

params.Proofs = postOut

for _, sector := range postSkipped {
params.Partitions[sidToPart[sector.Number]].Skipped.Set(uint64(sector.Number))
}

elapsed := time.Since(tsStart)

commEpoch := di.Open
commRand, err := s.api.ChainGetRandomnessFromTickets(ctx, ts.Key(), crypto.DomainSeparationTag_PoStChainCommit, commEpoch, nil)
if err != nil {
Expand All @@ -431,7 +451,7 @@ func (s *WindowPoStScheduler) runPost(ctx context.Context, di miner.DeadlineInfo
params.ChainCommitEpoch = commEpoch
params.ChainCommitRand = commRand

log.Infow("submitting window PoSt", "elapsed", elapsed)
log.Infow("submitting window PoSt")

return params, nil
}
Expand Down