接Redis Sentinel 源码分析(一)
sentinelTimer函数周期性运行,第一次在服务启动后1ms执行,后续执行周期1000/server.hz(sentinelTimer函数会修改server.hz的值)
sentinelTimer内部包含sentinel模式需要定期执行的操作,包括check master、slave、sentinel的状态,并根据配置的条件判断是否需要fail over。
void sentinelTimer(void) {
//check是否需要进入TITL模式
sentinelCheckTiltCondition();
//执行定期操作(检查redis-server状态,和其他sentinel节点交互等)
sentinelHandleDictOfRedisInstances(sentinel.masters);
//运行等待执行的脚本
sentinelRunPendingScripts();
//清理已执行完毕脚本
sentinelCollectTerminatedScripts();
//杀死超时运行的脚本
sentinelKillTimedoutScripts();
//修改hz值(影响sentinel相关操作执行频率),引入随机值,尽量避免所有sentinel节点持续性的同一时间发起投票请求
server.hz = REDIS_DEFAULT_HZ + rand() % REDIS_DEFAULT_HZ;
}
sentinelCheckTiltCondition函数会check是否进入TITL模式,所谓TITL模式即只收集数据,而不做fail-over
进入TITL模式的原因可能是:
1)sentinel的部分操作被阻塞(可能是 系统负载导致)
2)系统时钟异常
进入TITL模式的原因是为了避免错误的进行fail-over
void sentinelCheckTiltCondition(void) {
mstime_t now = mstime();
mstime_t delta = now - sentinel.previous_time;
//两次执行时间<0或者大于2s,则进入TITL模式
if (delta < 0 || delta > SENTINEL_TILT_TRIGGER) {
sentinel.tilt = 1;
sentinel.tilt_start_time = mstime();
sentinelEvent(REDIS_WARNING,"+tilt",NULL,"#tilt mode entered");
}
sentinel.previous_time = mstime();
}
sentinelHandleDictOfRedisInstances包含遍历所有instance,执行周期性操作
void sentinelHandleDictOfRedisInstances(dict *instances) {
dictIterator *di;
dictEntry *de;
sentinelRedisInstance *switch_to_promoted = NULL;
//遍历获取所有master结点
di = dictGetIterator(instances);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
//执行结点的周期性操作
sentinelHandleRedisInstance(ri);
// 如果被遍历的是master,则遍历和该master关联的所有slave&sentinel
if (ri->flags & SRI_MASTER) {
sentinelHandleDictOfRedisInstances(ri->slaves);
sentinelHandleDictOfRedisInstances(ri->sentinels);
//如果master的状态为SENTINEL_FAILOVER_STATE_UPDATE_CONFIG,则准备执行failover
if (ri->failover_state == SENTINEL_FAILOVER_STATE_UPDATE_CONFIG) {
switch_to_promoted = ri;
}
}
}
//执行failover
if (switch_to_promoted)
sentinelFailoverSwitchToPromotedSlave(switch_to_promoted);
dictReleaseIterator(di);
}
sentinelHandleRedisInstance包含了具体的周期性操作,包括针对sentinel、slave、master实例的操作
void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
/* 以下为所有实例都需要执行的操作 */
//连接及订阅管理
sentinelReconnectInstance(ri);
//和instance交流(PING/INFO/PUBLISH)
sentinelPingInstance(ri);
//如果仍然处于TILT模式,啥也不干
if (sentinel.tilt) {
if (mstime()-sentinel.tilt_start_time < SENTINEL_TILT_PERIOD) return;
sentinel.tilt = 0;
sentinelEvent(REDIS_WARNING,"-tilt",NULL,"#tilt mode exited");
}
//判断instance是否下线(sdown)
sentinelCheckSubjectivelyDown(ri);
......
/* 以下操作只针对master instance*/
if (ri->flags & SRI_MASTER) {
//check master是否为odown(满足用户配置的quorum节点数判断master为sdown)
sentinelCheckObjectivelyDown(ri);
//check是否需要做fail over,如果确认需要,则调用sentinelStartFailover修改自身状态
if (sentinelStartFailoverIfNeeded(ri))
//发送SENTINEL is-master-down-by-addr给其他的sentinel,并注册毁掉函数
sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_ASK_FORCED);
//执行故障转移
sentinelFailoverStateMachine(ri);
sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_NO_FLAGS);
}
}
sentinelReconnectInstance函数负责建立连接、重连,包括和各个instance建立连接,针对master instance,订阅其“__sentinel__:hello”频道
void sentinelReconnectInstance(sentinelRedisInstance *ri) {
if (!(ri->flags & SRI_DISCONNECTED)) return;
//和master/slave/sentinel instance建立连接
if (ri->cc == NULL) {
......
}
//针对master/slave,订阅其“__sentinel