Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go/vt/vtctl/grpcvtctldserver/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9029,7 +9029,7 @@ func TestPlannedReparentShard(t *testing.T) {
WaitReplicasTimeout: protoutil.DurationToProto(time.Millisecond * 10),
},
expectEventsToOccur: true,
expectedErr: "global status vars failed",
expectedErr: "failed to verify tablet zone1-0000000200 is reachable: global status vars failed",
},
}

Expand Down
30 changes: 15 additions & 15 deletions go/vt/vtctl/reparentutil/emergency_reparenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func (erp *EmergencyReparenter) getLockAction(newPrimaryAlias *topodatapb.Tablet
// reparentShardLocked performs Emergency Reparent Shard operation assuming that the shard is already locked
func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, opts EmergencyReparentOptions) (err error) {
// log the starting of the operation and increment the counter
erp.logger.Infof("will initiate emergency reparent shard in keyspace - %s, shard - %s", keyspace, shard)
log.Infof("will initiate emergency reparent shard in keyspace - %s, shard - %s", keyspace, shard)

var (
stoppedReplicationSnapshot *replicationSnapshot
Expand Down Expand Up @@ -180,7 +180,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve
return err
}

erp.logger.Infof("Getting a new durability policy for %v", keyspaceDurability)
log.Infof("Getting a new durability policy for %v", keyspaceDurability)
opts.durability, err = policy.GetDurabilityPolicy(keyspaceDurability)
if err != nil {
return err
Expand Down Expand Up @@ -251,7 +251,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve
if err != nil {
return err
}
erp.logger.Infof("intermediate source selected - %v", intermediateSource.Alias)
log.Infof("intermediate source selected - %v", intermediateSource.Alias)

// After finding the intermediate source, we want to filter the valid candidate list by the following criteria -
// 1. Only keep the tablets which can make progress after being promoted (have sufficient reachable semi-sync ackers)
Expand All @@ -269,7 +269,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve
if err != nil {
return err
}
erp.logger.Infof("intermediate source is ideal candidate- %v", isIdeal)
log.Infof("intermediate source is ideal candidate- %v", isIdeal)

// Check (again) we still have the topology lock.
if err := topo.CheckShardLocked(ctx, keyspace, shard); err != nil {
Expand Down Expand Up @@ -399,7 +399,7 @@ func (erp *EmergencyReparenter) findMostAdvanced(
tabletMap map[string]*topo.TabletInfo,
opts EmergencyReparentOptions,
) (*topodatapb.Tablet, []*topodatapb.Tablet, error) {
erp.logger.Infof("started finding the intermediate source")
log.Infof("started finding the intermediate source")
// convert the valid candidates into a list so that we can use it for sorting
validTablets, tabletPositions, err := getValidCandidatesAndPositionsAsList(validCandidates, tabletMap)
if err != nil {
Expand All @@ -412,7 +412,7 @@ func (erp *EmergencyReparenter) findMostAdvanced(
return nil, nil, err
}
for _, tablet := range validTablets {
erp.logger.Infof("finding intermediate source - sorted replica: %v", tablet.Alias)
log.Infof("finding intermediate source - sorted replica: %v", tablet.Alias)
}

// The first tablet in the sorted list will be the most eligible candidate unless explicitly asked for some other tablet
Expand Down Expand Up @@ -537,18 +537,18 @@ func (erp *EmergencyReparenter) reparentReplicas(
var position string
var err error
if ev.ShardInfo.PrimaryAlias == nil {
erp.logger.Infof("setting up %v as new primary for an uninitialized cluster", alias)
log.Infof("setting up %v as new primary for an uninitialized cluster", alias)
// we call InitPrimary when the PrimaryAlias in the ShardInfo is empty. This happens when we have an uninitialized cluster.
position, err = erp.tmc.InitPrimary(primaryCtx, tablet, policy.SemiSyncAckers(opts.durability, tablet) > 0)
} else {
erp.logger.Infof("starting promotion for the new primary - %v", alias)
log.Infof("starting promotion for the new primary - %v", alias)
// we call PromoteReplica which changes the tablet type, fixes the semi-sync, set the primary to read-write and flushes the binlogs
position, err = erp.tmc.PromoteReplica(primaryCtx, tablet, policy.SemiSyncAckers(opts.durability, tablet) > 0)
}
if err != nil {
return vterrors.Wrapf(err, "primary-elect tablet %v failed to be upgraded to primary: %v", alias, err)
}
erp.logger.Infof("populating reparent journal on new primary %v", alias)
log.Infof("populating reparent journal on new primary %v", alias)
err = erp.tmc.PopulateReparentJournal(primaryCtx, tablet, now, opts.lockAction, tablet.Alias, position)
if err != nil {
return vterrors.Wrapf(err, "failed to PopulateReparentJournal on primary: %v", err)
Expand All @@ -559,7 +559,7 @@ func (erp *EmergencyReparenter) reparentReplicas(

handleReplica := func(alias string, ti *topo.TabletInfo) {
defer replWg.Done()
erp.logger.Infof("setting new primary on replica %v", alias)
log.Infof("setting new primary on replica %v", alias)

forceStart := false
if status, ok := statusMap[alias]; ok {
Expand Down Expand Up @@ -694,7 +694,7 @@ func (erp *EmergencyReparenter) identifyPrimaryCandidate(
) (candidate *topodatapb.Tablet, err error) {
defer func() {
if candidate != nil {
erp.logger.Infof("found better candidate - %v", candidate.Alias)
log.Infof("found better candidate - %v", candidate.Alias)
}
}()

Expand Down Expand Up @@ -747,23 +747,23 @@ func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb
tabletAliasStr := topoproto.TabletAliasString(tablet.Alias)
// Remove tablets which have MustNot promote rule since they must never be promoted
if policy.PromotionRule(opts.durability, tablet) == promotionrule.MustNot {
erp.logger.Infof("Removing %s from list of valid candidates for promotion because it has the Must Not promote rule", tabletAliasStr)
log.Infof("Removing %s from list of valid candidates for promotion because it has the Must Not promote rule", tabletAliasStr)
if opts.NewPrimaryAlias != nil && topoproto.TabletAliasEqual(opts.NewPrimaryAlias, tablet.Alias) {
return nil, vterrors.Errorf(vtrpc.Code_ABORTED, "proposed primary %s has a must not promotion rule", topoproto.TabletAliasString(opts.NewPrimaryAlias))
}
continue
}
// If ERS is configured to prevent cross cell promotions, remove any tablet not from the same cell as the previous primary
if opts.PreventCrossCellPromotion && prevPrimary != nil && tablet.Alias.Cell != prevPrimary.Alias.Cell {
erp.logger.Infof("Removing %s from list of valid candidates for promotion because it isn't in the same cell as the previous primary", tabletAliasStr)
log.Infof("Removing %s from list of valid candidates for promotion because it isn't in the same cell as the previous primary", tabletAliasStr)
if opts.NewPrimaryAlias != nil && topoproto.TabletAliasEqual(opts.NewPrimaryAlias, tablet.Alias) {
return nil, vterrors.Errorf(vtrpc.Code_ABORTED, "proposed primary %s is is a different cell as the previous primary", topoproto.TabletAliasString(opts.NewPrimaryAlias))
}
continue
}
// Remove any tablet which cannot make forward progress using the list of tablets we have reached
if !canEstablishForTablet(opts.durability, tablet, tabletsReachable) {
erp.logger.Infof("Removing %s from list of valid candidates for promotion because it will not be able to make forward progress on promotion with the tablets currently reachable", tabletAliasStr)
log.Infof("Removing %s from list of valid candidates for promotion because it will not be able to make forward progress on promotion with the tablets currently reachable", tabletAliasStr)
if opts.NewPrimaryAlias != nil && topoproto.TabletAliasEqual(opts.NewPrimaryAlias, tablet.Alias) {
return nil, vterrors.Errorf(vtrpc.Code_ABORTED, "proposed primary %s will not be able to make forward progress on being promoted", topoproto.TabletAliasString(opts.NewPrimaryAlias))
}
Expand All @@ -772,7 +772,7 @@ func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb
// Put candidates that are running a backup in a separate list
backingUp, ok := tabletsBackupState[tabletAliasStr]
if ok && backingUp {
erp.logger.Infof("Setting %s in list of valid candidates taking a backup", tabletAliasStr)
log.Infof("Setting %s in list of valid candidates taking a backup", tabletAliasStr)
notPreferredValidTablets = append(notPreferredValidTablets, tablet)
} else {
restrictedValidTablets = append(restrictedValidTablets, tablet)
Expand Down
7 changes: 6 additions & 1 deletion go/vt/vtctl/reparentutil/planned_reparenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -753,10 +753,15 @@ func (pr *PlannedReparenter) verifyAllTabletsReachable(ctx context.Context, tabl
errorGroup, groupCtx := errgroup.WithContext(verifyCtx)
for tblStr, info := range tabletMap {
tablet := info.Tablet
tabletAlias := tblStr
errorGroup.Go(func() error {
statusValues, err := pr.tmc.GetGlobalStatusVars(groupCtx, tablet, []string{InnodbBufferPoolsDataVar})
if err != nil {
return err
if groupCtx.Err() == context.DeadlineExceeded {
return vterrors.Wrapf(err, "timed out verifying tablet %v is reachable (timeout: %v); all tablets must be reachable for PlannedReparent",
tabletAlias, topo.RemoteOperationTimeout)
}
return vterrors.Wrapf(err, "failed to verify tablet %v is reachable", tabletAlias)
}
// We are ignoring the error in conversion because some MySQL variants might not have this
// status variable like MariaDB.
Expand Down
4 changes: 2 additions & 2 deletions go/vt/vtctl/reparentutil/replication.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func stopReplicationAndBuildStatusMaps(
errChan <- concurrencyErr
}()

logger.Infof("getting replication position from %v", alias)
log.Infof("getting replication position from %v", alias)

stopReplicationStatus, err := tmc.StopReplicationAndGetStatus(groupCtx, tabletInfo.Tablet, replicationdatapb.StopReplicationMode_IOTHREADONLY)
if err != nil {
Expand All @@ -260,7 +260,7 @@ func stopReplicationAndBuildStatusMaps(
msg := "replica %v thinks it's primary but we failed to demote it: %v"
err = vterrors.Wrapf(err, msg, alias, err)

logger.Warningf(msg, alias, err)
log.Warningf(msg, alias, err)
return
}

Expand Down
Loading