sentinelPingInstance会根据instance状况,向其发送命令,可能是INFO/PING/PUBLISH
void sentinelPingInstance(sentinelRedisInstance *ri) {
//假如instance处于不可连接状态或者过多的命令(100)还没有发送出去,直接返回
if (ri->flags & SRI_DISCONNECTED) return;
if (ri->pending_commands >= SENTINEL_MAX_PENDING_COMMANDS) return;
//对于slave instance,如果其master处于异常状态(SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS),则向该slave发送info的频率从10s一发提高到1s一发
if ((ri->flags & SRI_SLAVE) &&
(ri->master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS))) {
info_period = 1000;
} else {
info_period = SENTINEL_INFO_PERIOD;
}
//对于mastere/slave instance,每隔info_period时间,向其发送info命令,注册info命令的回调函数为sentinelInfoReplyCallback
//sentinelInfoReplyCallback会根据从master/slave所得到的回复中分析出相关信息,并更新sentinelRedisInstance的当前状态
if ((ri->flags & SRI_SENTINEL) == 0 &&
(ri->info_refresh == 0 ||
(now - ri->info_refresh) > info_period))
{
/* Send INFO to masters and slaves, not sentinels. */
retval = redisAsyncCommand(ri->cc,
sentinelInfoReplyCallback, NULL, "INFO");
if (retval != REDIS_OK) return;
ri->pending_commands++;
}
//对于所有类型的instance,都定时向其发送PING命令(1s),注册ping命令的回调函数为sentinelPingReplyCallback
//sentinelPingReplyCallback根据PING命令的返回值判断instance当前状态
else if ((now - ri->last_pong_time) > SENTINEL_PING_PERIOD) {
retval = redisAsyncCommand(ri->cc,
sentinelPingReplyCallback, NULL, "PING");
if (retval != REDIS_OK) return;
ri->pending_commands++;
//每隔2s向master/slave的“__sentinel__:hello”频道发布消息
//消息内容为:ip,port,runid,current_epoch, master->name,master->ip,master->port
} else if ((ri->flags & SRI_SENTINEL) == 0 &&
(now - ri->last_pub_time) > SENTINEL_PUBLISH_PERIOD)
{
sentinelSendHello(ri);
}
}
sentinelCheckObjectivelyDown函数确认是否将master状态从sdown改为odown
void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
......
//假如本身的状态为sdown,则开始判断是否可以判断为odown
if (master->flags & SRI_S_DOWN) {
quorum = 1;
di = dictGetIterator(master->sentinels);
//遍历sentinel字典,查看其是否将master状态职位sdown
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
if (ri->flags & SRI_MASTER_DOWN) quorum++;
}
dictReleaseIterator(di);
//假如sentinel flag状态为SRI_MASTER_DOWN的sentinel个数达到用户定义的quorum个数,则将master状态置为odown
if (quorum >= master->quorum) odown = 1;
}
......
}
int sentinelStartFailoverIfNeeded(sentinelRedisInstance *master) {
//确认master状态为odown
if (!(master->flags & SRI_O_DOWN)) return 0;
//确认failover没有在运行
if (master->flags & SRI_FAILOVER_IN_PROGRESS) return 0;
//确认在超时时间*2内没有failover在运行
if (mstime() - master->failover_start_time <
master->failover_timeout*2) return 0;
sentinelStartFailover(master);
return 1;
}
在确认要进行failover后,调用sentinelStartFailover修改相关状态数据
void sentinelStartFailover(sentinelRedisInstance *master) {
redisAssert(master->flags & SRI_MASTER);
// 设置 failover 状态
master->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;
// 设置master当前状态
master->flags |= SRI_FAILOVER_IN_PROGRESS;
// 设置failover_epoch
master->failover_epoch = ++sentinel.c