Skip to content

Commit

Permalink
*: log format for server (tikv#1431)
Browse files Browse the repository at this point in the history
* *: log format for server

Signed-off-by: nolouch <[email protected]>
  • Loading branch information
nolouch committed Jul 10, 2019
1 parent 1ff703c commit caa98c4
Show file tree
Hide file tree
Showing 15 changed files with 203 additions and 111 deletions.
39 changes: 27 additions & 12 deletions server/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@ import (

"github.com/pingcap/kvproto/pkg/metapb"
"github.com/pingcap/kvproto/pkg/pdpb"
log "github.com/pingcap/log"
errcode "github.com/pingcap/pd/pkg/error_code"
"github.com/pingcap/pd/pkg/logutil"
"github.com/pingcap/pd/server/core"
"github.com/pingcap/pd/server/namespace"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"go.uber.org/zap"
)

var backgroundJobInterval = time.Minute
Expand Down Expand Up @@ -354,7 +355,9 @@ func (c *RaftCluster) putStore(store *metapb.Store) error {
// Check location labels.
for _, k := range c.cachedCluster.GetLocationLabels() {
if v := s.GetLabelValue(k); len(v) == 0 {
log.Warnf("missing location label %q in store %v", k, s)
log.Warn("missing location label",
zap.Stringer("store", s.Store),
zap.String("label-key", k))
}
}
return cluster.putStore(s)
Expand Down Expand Up @@ -384,7 +387,9 @@ func (c *RaftCluster) RemoveStore(storeID uint64) error {
}

store.State = metapb.StoreState_Offline
log.Warnf("[store %d] store %s has been Offline", store.GetId(), store.GetAddress())
log.Warn("store has been offline",
zap.Uint64("store-id", store.GetId()),
zap.String("store-address", store.GetAddress()))
return cluster.putStore(store)
}

Expand Down Expand Up @@ -412,11 +417,12 @@ func (c *RaftCluster) BuryStore(storeID uint64, force bool) error { // revive:di
if !force {
return errors.New("store is still up, please remove store gracefully")
}
log.Warnf("forcedly bury store %v", store)
log.Warn("forcedly bury store", zap.Stringer("store", store.Store))
}

store.State = metapb.StoreState_Tombstone
log.Warnf("[store %d] store %s has been Tombstone", store.GetId(), store.GetAddress())
log.Warn("store has been Tombstone",
zap.Uint64("store-id", store.GetId()),
zap.String("store-address", store.GetAddress()))
return cluster.putStore(store)
}

Expand All @@ -433,7 +439,9 @@ func (c *RaftCluster) SetStoreState(storeID uint64, state metapb.StoreState) err
}

store.State = state
log.Warnf("[store %d] set state to %v", storeID, state.String())
log.Warn("store update state",
zap.Uint64("store-id", storeID),
zap.Stringer("new-state", state))
return cluster.putStore(store)
}

Expand Down Expand Up @@ -476,7 +484,9 @@ func (c *RaftCluster) checkStores() {
// If the store is empty, it can be buried.
if cluster.getStoreRegionCount(offlineStore.GetId()) == 0 {
if err := c.BuryStore(offlineStore.GetId(), false); err != nil {
log.Errorf("bury store %v failed: %v", offlineStore, err)
log.Error("bury store failed",
zap.Stringer("store", offlineStore),
zap.Error(err))
}
} else {
offlineStores = append(offlineStores, offlineStore)
Expand All @@ -489,7 +499,7 @@ func (c *RaftCluster) checkStores() {

if upStoreCount < c.s.GetConfig().Replication.MaxReplicas {
for _, offlineStore := range offlineStores {
log.Warnf("store %v may not turn into Tombstone, there are no extra up node has enough space to accommodate the extra replica", offlineStore)
log.Warn("store may not turn into Tombstone, there are no extra up node has enough space to accommodate the extra replica", zap.Stringer("store", offlineStore))
}
}
}
Expand All @@ -500,13 +510,18 @@ func (c *RaftCluster) checkOperators() {
// after region is merged, it will not heartbeat anymore
// the operator of merged region will not timeout actively
if c.cachedCluster.GetRegion(op.RegionID()) == nil {
log.Debugf("remove operator %v cause region %d is merged", op, op.RegionID())
log.Debug("remove operator cause region is merged",
zap.Uint64("region-id", op.RegionID()),
zap.Stringer("operator", op))
co.removeOperator(op)
continue
}

if op.IsTimeout() {
log.Infof("[region %v] operator timeout: %s", op.RegionID(), op)
log.Info("operator timeout",
zap.Uint64("region-id", op.RegionID()),
zap.Stringer("operator", op))

operatorCounter.WithLabelValues(op.Desc(), "timeout").Inc()
co.removeOperator(op)
}
Expand All @@ -531,7 +546,7 @@ func (c *RaftCluster) collectHealthStatus() {
client := c.s.GetClient()
members, err := GetMembers(client)
if err != nil {
log.Info("get members error:", err)
log.Error("get members error", zap.Error(err))
}
unhealth := c.s.CheckHealth(members)
for _, member := range members {
Expand Down
56 changes: 44 additions & 12 deletions server/cluster_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ import (
"github.com/gogo/protobuf/proto"
"github.com/pingcap/kvproto/pkg/metapb"
"github.com/pingcap/kvproto/pkg/pdpb"
log "github.com/pingcap/log"
"github.com/pingcap/pd/server/core"
"github.com/pingcap/pd/server/namespace"
"github.com/pingcap/pd/server/schedule"
log "github.com/sirupsen/logrus"
"go.uber.org/zap"
)

type clusterInfo struct {
Expand Down Expand Up @@ -68,13 +69,19 @@ func loadClusterInfo(id core.IDAllocator, kv *core.KV, opt *scheduleOption) (*cl
if err := kv.LoadStores(c.core.Stores); err != nil {
return nil, err
}
log.Infof("load %v stores cost %v", c.core.Stores.GetStoreCount(), time.Since(start))
log.Info("load stores",
zap.Int("count", c.core.Stores.GetStoreCount()),
zap.Duration("cost", time.Since(start)),
)

start = time.Now()
if err := kv.LoadRegions(c.core.Regions); err != nil {
return nil, err
}
log.Infof("load %v regions cost %v", c.core.Regions.GetRegionCount(), time.Since(start))
log.Info("load regions",
zap.Int("count", c.core.Regions.GetRegionCount()),
zap.Duration("cost", time.Since(start)),
)

return c, nil
}
Expand All @@ -101,9 +108,11 @@ func (c *clusterInfo) OnStoreVersionChange() {
c.opt.SetClusterVersion(*minVersion)
err := c.opt.persist(c.kv)
if err != nil {
log.Infof("persist cluster version meet error: %s", err)
log.Error("persist cluster version meet error", zap.Error(err))
}
log.Infof("cluster version changed from %s to %s", clusterVersion, minVersion)
log.Info("cluster version changed",
zap.Stringer("old-cluster-version", clusterVersion),
zap.Stringer("new-cluster-version", minVersion))
CheckPDVersion(c.opt)
}
}
Expand All @@ -123,7 +132,7 @@ func (c *clusterInfo) allocID() (uint64, error) {
func (c *clusterInfo) AllocPeer(storeID uint64) (*metapb.Peer, error) {
peerID, err := c.allocID()
if err != nil {
log.Errorf("failed to alloc peer: %v", err)
log.Error("failed to alloc peer", zap.Error(err))
return nil, err
}
peer := &metapb.Peer{
Expand Down Expand Up @@ -471,7 +480,10 @@ func (c *clusterInfo) handleRegionHeartbeat(region *core.RegionInfo) error {
// Mark isNew if the region in cache does not have leader.
var saveKV, saveCache, isNew bool
if origin == nil {
log.Debugf("[region %d] Insert new region {%v}", region.GetID(), core.HexRegionMeta(region.GetMeta()))
log.Debug("insert new region",
zap.Uint64("region-id", region.GetID()),
zap.Reflect("meta-region", core.HexRegionMeta(region.GetMeta())),
)
saveKV, saveCache, isNew = true, true, true
} else {
r := region.GetRegionEpoch()
Expand All @@ -481,18 +493,32 @@ func (c *clusterInfo) handleRegionHeartbeat(region *core.RegionInfo) error {
return ErrRegionIsStale(region.GetMeta(), origin.GetMeta())
}
if r.GetVersion() > o.GetVersion() {
log.Infof("[region %d] %s, Version changed from {%d} to {%d}", region.GetID(), core.DiffRegionKeyInfo(origin, region), o.GetVersion(), r.GetVersion())
log.Info("region Version changed",
zap.Uint64("region-id", region.GetID()),
zap.String("detail", core.DiffRegionKeyInfo(origin, region)),
zap.Uint64("old-version", o.GetVersion()),
zap.Uint64("new-version", r.GetVersion()),
)
saveKV, saveCache = true, true
}
if r.GetConfVer() > o.GetConfVer() {
log.Infof("[region %d] %s, ConfVer changed from {%d} to {%d}", region.GetID(), core.DiffRegionPeersInfo(origin, region), o.GetConfVer(), r.GetConfVer())
log.Info("region ConfVer changed",
zap.Uint64("region-id", region.GetID()),
zap.String("detail", core.DiffRegionPeersInfo(origin, region)),
zap.Uint64("old-confver", o.GetConfVer()),
zap.Uint64("new-confver", r.GetConfVer()),
)
saveKV, saveCache = true, true
}
if region.GetLeader().GetId() != origin.GetLeader().GetId() {
if origin.GetLeader().GetId() == 0 {
isNew = true
} else {
log.Infof("[region %d] Leader changed from store {%d} to {%d}", region.GetID(), origin.GetLeader().GetStoreId(), region.GetLeader().GetStoreId())
log.Info("leader changed",
zap.Uint64("region-id", region.GetID()),
zap.Uint64("from", origin.GetLeader().GetStoreId()),
zap.Uint64("to", region.GetLeader().GetStoreId()),
)
}
saveCache = true
}
Expand All @@ -517,7 +543,10 @@ func (c *clusterInfo) handleRegionHeartbeat(region *core.RegionInfo) error {
if err := c.kv.SaveRegion(region.GetMeta()); err != nil {
// Not successfully saved to kv is not fatal, it only leads to longer warm-up
// after restart. Here we only log the error then go on updating cache.
log.Errorf("[region %d] fail to save region %v: %v", region.GetID(), core.HexRegionMeta(region.GetMeta()), err)
log.Error("fail to save region to kv",
zap.Uint64("region-id", region.GetID()),
zap.Reflect("region-meta", core.HexRegionMeta(region.GetMeta())),
zap.Error(err))
}
}
if !isWriteUpdate && !isReadUpdate && !saveCache && !isNew {
Expand All @@ -535,7 +564,10 @@ func (c *clusterInfo) handleRegionHeartbeat(region *core.RegionInfo) error {
if c.kv != nil {
for _, item := range overlaps {
if err := c.kv.DeleteRegion(item); err != nil {
log.Errorf("[region %d] fail to delete region %v: %v", item.GetId(), core.HexRegionMeta(item), err)
log.Error("fail to delete region from kv",
zap.Uint64("region-id", item.GetId()),
zap.Reflect("region-meta", core.HexRegionMeta(item)),
zap.Error(err))
}
}
}
Expand Down
23 changes: 17 additions & 6 deletions server/cluster_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ import (
"github.com/gogo/protobuf/proto"
"github.com/pingcap/kvproto/pkg/metapb"
"github.com/pingcap/kvproto/pkg/pdpb"
log "github.com/pingcap/log"
"github.com/pingcap/pd/server/core"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"go.uber.org/zap"
)

// HandleRegionHeartbeat processes RegionInfo reports from client.
Expand All @@ -32,7 +33,7 @@ func (c *RaftCluster) HandleRegionHeartbeat(region *core.RegionInfo) error {

// If the region peer count is 0, then we should not handle this.
if len(region.GetPeers()) == 0 {
log.Warnf("invalid region, zero region peer count: %v", core.HexRegionMeta(region.GetMeta()))
log.Warn("invalid region, zero region peer count", zap.Reflect("region-meta", core.HexRegionMeta(region.GetMeta())))
return errors.Errorf("invalid region, zero region peer count: %v", core.HexRegionMeta(region.GetMeta()))
}

Expand Down Expand Up @@ -163,15 +164,20 @@ func (c *RaftCluster) handleReportSplit(request *pdpb.ReportSplitRequest) (*pdpb

err := c.checkSplitRegion(left, right)
if err != nil {
log.Warnf("report split region is invalid: %v, %v, %v", core.HexRegionMeta(left), core.HexRegionMeta(right), err)
log.Warn("report split region is invalid",
zap.Reflect("left-region", core.HexRegionMeta(left)),
zap.Reflect("right-region", core.HexRegionMeta(right)),
zap.Error(err))
return nil, err
}

// Build origin region by using left and right.
originRegion := proto.Clone(right).(*metapb.Region)
originRegion.RegionEpoch = nil
originRegion.StartKey = left.GetStartKey()
log.Infof("[region %d] region split, generate new region: %v", originRegion.GetId(), core.HexRegionMeta(left))
log.Info("region split, generate new region",
zap.Uint64("region-id", originRegion.GetId()),
zap.Reflect("region-meta", core.HexRegionMeta(left)))
return &pdpb.ReportSplitResponse{}, nil
}

Expand All @@ -184,11 +190,16 @@ func (c *RaftCluster) handleBatchReportSplit(request *pdpb.ReportBatchSplitReque

err := c.checkSplitRegions(regions)
if err != nil {
log.Warnf("report batch split region is invalid: %v, %v", hexRegionMetas, err)
log.Warn("report batch split region is invalid",
zap.Reflect("region-meta", hexRegionMetas),
zap.Error(err))
return nil, err
}
last := len(regions) - 1
originRegion := proto.Clone(regions[last]).(*metapb.Region)
log.Infof("[region %d] region split, generate %d new regions: %v", originRegion.GetId(), last, hexRegionMetas[:last])
log.Info("region batch split, generate new regions",
zap.Uint64("region-id", originRegion.GetId()),
zap.Reflect("origin", hexRegionMetas[:last]),
zap.Int("total", last))
return &pdpb.ReportBatchSplitResponse{}, nil
}
Loading

0 comments on commit caa98c4

Please sign in to comment.