业务最初的应用场景中, 我们也许使用单机 Redis 就可以应付业务要求, 但并非一直可行.
比如单机的读写能力问题, 单机的可用性问题, 单机的数据安全性问题. 这些都是许多互联网应用经常会遇到的问题, 也基本上都有一套理论去解决它, 只是百花齐放.
哨兵是 Redis 中解决高可用问题的解决方案之一, 我们就一起来看看 Redis 是如何实现的吧! 不过此方案, 仅提供思路供参考, 不要以此为标准方案.
前面介绍的主从复制功能, 可以说已经一定程度上解决了数据安全性问题问题, 即有了备份数据, 我们可以可以做读写分离了. 只是, 可用性问题还未解决, 即当 master 宕机或出现其他故障时, 整个写服务就不可用了. 解决方法是, 手动操作, 要么重启 master 使其恢复服务, 要么把 master 切换为其他 slave 机器.
如果服务的可用性需要人工介入的话, 那就算不得高可用了, 所以我们需要一个自动处理机制. 这就是哨兵模式.
一, 哨兵系统介绍
哨兵系统要解决的问题核心, 自然是高可用问题. 而如何解决, 则是其设计问题. 而最终呈现给用户的, 应该一个个的功能单元, 即其提供的能力. 如下:
监控 (Monitoring): Sentinel 会不断地检查你的主服务器和从服务器是否运作正常.
提醒 (Notification): 当被监控的某个 Redis 服务器出现问题时, Sentinel 可以通过 API 向管理员或者其他应用程序发送通知.
自动故障迁移 (Automatic failover): 当一个主服务器不能正常工作时, Sentinel 会开始一次自动故障迁移操作, 它会将失效主服务器的其中一个从服务器升级为新的主服务器, 并让失效主服务器的其他从服务器改为复制新的主服务器;
- # Example sentinel.conf
- # 定义 sentinel 服务端口号
- port 26379
- # 针对 使用端口映射的方式的启动, 指定 ip:port
- # sentinel announce-ip <ip>
- # sentinel announce-port <port>
- # 工作目录定义
- dir /tmp
- # 要监视的 Redis master 定义, 可配置多个 master-name 不同即可
- # sentinel monitor <master-name> <ip> <Redis-port> <quorum>
- sentinel monitor mymaster 127.0.0.1 6379 2
- # 定义 master/slave 的密码, 要求同一主从服务所有密码必须保持一致
- # sentinel auth-pass <master-name> <password>
- # 定义 master 不可达持续多少毫秒后开始定义为节点下线, 默认 30s
- sentinel down-after-milliseconds mymaster 30000
- # sentinel parallel-syncs <master-name> <numslaves>
- # 在故障转移期间同时与新的 master 同步的 slave 数量
- sentinel parallel-syncs mymaster 1
- # 定义进行故障转移的超时时间, 默认 3 分钟
- sentinel failover-timeout mymaster 180000
- # 发生故障转移时调用的通知脚本, 被调用时会传递两个参数: eventType, eventDescription
- # sentinel notification-script mymaster /var/Redis/notify.sh
- # master 变更时调用脚本配置
- # 调用时会传递如下参数
- # <master-name> <role> <state> <from-ip> <from-port> <to-ip> <to-port>
- # sentinel client-reconfig-script mymaster /var/Redis/reconfig.sh
- # 使用 Redis-sentinel 程序启动, 这个程序不一定会有, 需要自己编译
- Redis-sentinel /path/to/sentinel.conf
- # 使用 Redis-server 程序启动, 一定可用
- # 测试时可以加上 --protected-mode no, 在不设置密码情况下访问 Redis
- Redis-server /path/to/sentinel.conf --sentinel
- Redis-cli -p 26379 # 连接到 sentinel
- info sentinel # 查看哨兵信息
- SENTINEL slaves mymaster # 查看 master 下的 slave 服务器情况
- SENTINEL sentinels mymaster # 查看 master 的哨兵服务器列表
- SENTINEL get-master-addr-by-name mymaster # 获取 master 地址信息
- <dependency>
- <groupId>Redis.clients</groupId>
- <artifactId>jedis</artifactId>
- <version>2.9.0</version>
- </dependency>
- public class RedisSentinelTest {
- @Test
- public void testSentinel() throws Exception {
- String masterName = "mymaster";
- // 只需设置 sentinel 信息, 真实的 Redis 实例信息由 sentinel 提供
- Set<String> sentinels = new HashSet<>();
- sentinels.add("127.0.0.1:26379");
- sentinels.add("127.0.0.1:26378");
- sentinels.add("127.0.0.1:26377");
- JedisSentinelPool pool = new JedisSentinelPool(masterName, sentinels);
- Jedis jedis = pool.getResource();
- String key = "key1";
- String value = "Value1";
- // set get 测试哨兵系统是否可用
- jedis.set(key, value);
- System.out.println("set a value to Redis over." + key + "->" + value);
- value = jedis.get("key1");
- System.out.println("get a value from Redis over." + key + "->" + value);
- pool.close();
- }
- }
- // Redis.clients.jedis.JedisSentinelPool#JedisSentinelPool
- public JedisSentinelPool(String masterName, Set<String> sentinels) {
- this(masterName, sentinels, new GenericObjectPoolConfig(), Protocol.DEFAULT_TIMEOUT, null,
- Protocol.DEFAULT_DATABASE);
- }
- public JedisSentinelPool(String masterName, Set<String> sentinels,
- final GenericObjectPoolConfig poolConfig, int timeout, final String password,
- final int database) {
- this(masterName, sentinels, poolConfig, timeout, timeout, password, database);
- }
- public JedisSentinelPool(String masterName, Set<String> sentinels,
- final GenericObjectPoolConfig poolConfig, final int timeout, final int soTimeout,
- final String password, final int database) {
- this(masterName, sentinels, poolConfig, timeout, soTimeout, password, database, null);
- }
- public JedisSentinelPool(String masterName, Set<String> sentinels,
- final GenericObjectPoolConfig poolConfig, final int connectionTimeout, final int soTimeout,
- final String password, final int database, final String clientName) {
- this.poolConfig = poolConfig;
- this.connectionTimeout = connectionTimeout;
- this.soTimeout = soTimeout;
- this.password = password;
- this.database = database;
- this.clientName = clientName;
- // 从 sentinel 中获取 master 信息, 关键
- HostAndPort master = initSentinels(sentinels, masterName);
- // 初始化连接池, 非本文重点
- initPool(master);
- }
- private HostAndPort initSentinels(Set<String> sentinels, final String masterName) {
- HostAndPort master = null;
- boolean sentinelAvailable = false;
- log.info("Trying to find master from available Sentinels...");
- // 依次遍历 sentinels, 直到找到一个可用的 sentinel
- for (String sentinel : sentinels) {
- final HostAndPort hap = HostAndPort.parseString(sentinel);
- log.fine("Connecting to Sentinel" + hap);
- Jedis jedis = null;
- try {
- jedis = new Jedis(hap.getHost(), hap.getPort());
- // 向 sentinel 发送命令请求: SENTINEL get-master-addr-by-name mymaster, 获取 master 地址信息
- List<String> masterAddr = jedis.sentinelGetMasterAddrByName(masterName);
- // connected to sentinel...
- sentinelAvailable = true;
- if (masterAddr == null || masterAddr.size() != 2) {
- log.warning("Can not get master addr, master name:" + masterName + ". Sentinel:" + hap
- + ".");
- continue;
- }
- master = toHostAndPort(masterAddr);
- log.fine("Found Redis master at" + master);
- break;
- } catch (JedisException e) {
- // resolves #1036, it should handle JedisException there's another chance
- // of raising JedisDataException
- log.warning("Cannot get master address from sentinel running @" + hap + ". Reason:" + e
- + ". Trying next one.");
- } finally {
- if (jedis != null) {
- jedis.close();
- }
- }
- }
- if (master == null) {
- if (sentinelAvailable) {
- // can connect to sentinel, but master name seems to not
- // monitored
- throw new JedisException("Can connect to sentinel, but" + masterName
- + "seems to be not monitored...");
- } else {
- throw new JedisConnectionException("All sentinels down, cannot determine where is"
- + masterName + "master is running...");
- }
- }
- log.info("Redis master running at" + master + ", starting Sentinel listeners...");
- // 为每个 sentinel, 建立一个监听线程, 监听 sentinel 的 +switch-master 信息
- // 当 master 发生变化时, 重新初始化连接池
- for (String sentinel : sentinels) {
- final HostAndPort hap = HostAndPort.parseString(sentinel);
- MasterListener masterListener = new MasterListener(masterName, hap.getHost(), hap.getPort());
- // whether MasterListener threads are alive or not, process can be stopped
- masterListener.setDaemon(true);
- masterListeners.add(masterListener);
- masterListener.start();
- }
- return master;
- }
- // 每个 sentinel 监听线程事务处理流程如下
- // Redis.clients.jedis.JedisSentinelPool.MasterListener#run
- @Override
- public void run() {
- running.set(true);
- while (running.get()) {
- j = new Jedis(host, port);
- try {
- // double check that it is not being shutdown
- if (!running.get()) {
- break;
- }
- // SUBSCRIBE +switch-master
- j.subscribe(new JedisPubSub() {
- @Override
- public void onMessage(String channel, String message) {
- log.fine("Sentinel" + host + ":" + port + "published:" + message + ".");
- String[] switchMasterMsg = message.split(" ");
- // 格式为: masterName xx xx masterHost masterPort
- if (switchMasterMsg.length> 3) {
- if (masterName.equals(switchMasterMsg[0])) {
- initPool(toHostAndPort(Arrays.asList(switchMasterMsg[3], switchMasterMsg[4])));
- } else {
- log.fine("Ignoring message on +switch-master for master name"
- + switchMasterMsg[0] + ", our master name is" + masterName);
- }
- } else {
- log.severe("Invalid message received on Sentinel" + host + ":" + port
- + "on channel +switch-master:" + message);
- }
- }
- }, "+switch-master");
- } catch (JedisConnectionException e) {
- if (running.get()) {
- log.log(Level.SEVERE, "Lost connection to Sentinel at" + host + ":" + port
- + ". Sleeping 5000ms and retrying.", e);
- try {
- Thread.sleep(subscribeRetryWaitTimeMillis);
- } catch (InterruptedException e1) {
- log.log(Level.SEVERE, "Sleep interrupted:", e1);
- }
- } else {
- log.fine("Unsubscribing from Sentinel at" + host + ":" + port);
- }
- } finally {
- j.close();
- }
- }
- }
- // Redis.clients.jedis.JedisSentinelPool#initPool
- private void initPool(HostAndPort master) {
- if (!master.equals(currentHostMaster)) {
- currentHostMaster = master;
- if (factory == null) {
- factory = new JedisFactory(master.getHost(), master.getPort(), connectionTimeout,
- soTimeout, password, database, clientName, false, null, null, null);
- initPool(poolConfig, factory);
- } else {
- factory.setHostAndPort(currentHostMaster);
- // although we clear the pool, we still have to check the
- // returned object
- // in getResource, this call only clears idle instances, not
- // borrowed instances
- internalPool.clear();
- }
- log.info("Created JedisPool to master at" + master);
- }
- }
- // Redis.clients.util.Pool#initPool
- public void initPool(final GenericObjectPoolConfig poolConfig, PooledObjectFactory<T> factory) {
- if (this.internalPool != null) {
- try {
- closeInternalPool();
- } catch (Exception e) {
- }
- }
- this.internalPool = new GenericObjectPool<T>(factory, poolConfig);
- }
- // Redis.clients.jedis.JedisSentinelPool#getResource
- @Override
- public Jedis getResource() {
- while (true) {
- // 调用父类方法获取实例
- Jedis jedis = super.getResource();
- jedis.setDataSource(this);
- // get a reference because it can change concurrently
- final HostAndPort master = currentHostMaster;
- final HostAndPort connection = new HostAndPort(jedis.getClient().getHost(), jedis.getClient()
- .getPort());
- // host:port 比对, 如果 master 未变化, 说明获取到了正确的连接, 返回
- if (master.equals(connection)) {
- // connected to the correct master
- return jedis;
- }
- // 如果 master 发生了切换, 则将当前连接释放, 继续尝试获取 master 连接
- else {
- returnBrokenResource(jedis);
- }
- }
- }
- // Redis.clients.util.Pool#getResource
- public T getResource() {
- try {
- return internalPool.borrowObject();
- } catch (NoSuchElementException nse) {
- throw new JedisException("Could not get a resource from the pool", nse);
- } catch (Exception e) {
- throw new JedisConnectionException("Could not get a resource from the pool", e);
- }
- }
- // org.apache.commons.pool2.impl.GenericObjectPool#borrowObject()
- @Override
- public T borrowObject() throws Exception {
- return borrowObject(getMaxWaitMillis());
- }
- // org.apache.commons.pool2.impl.GenericObjectPool#borrowObject(long)
- public T borrowObject(final long borrowMaxWaitMillis) throws Exception {
- assertOpen();
- final AbandonedConfig ac = this.abandonedConfig;
- if (ac != null && ac.getRemoveAbandonedOnBorrow() &&
- (getNumIdle() <2) &&
- (getNumActive()> getMaxTotal() - 3) ) {
- removeAbandoned(ac);
- }
- PooledObject<T> p = null;
- // Get local copy of current config so it is consistent for entire
- // method execution
- final boolean blockWhenExhausted = getBlockWhenExhausted();
- boolean create;
- final long waitTime = System.currentTimeMillis();
- while (p == null) {
- create = false;
- p = idleObjects.pollFirst();
- if (p == null) {
- // 没有获取到连接时, 主动创建一个
- p = create();
- if (p != null) {
- create = true;
- }
- }
- if (blockWhenExhausted) {
- if (p == null) {
- if (borrowMaxWaitMillis <0) {
- p = idleObjects.takeFirst();
- } else {
- p = idleObjects.pollFirst(borrowMaxWaitMillis,
- TimeUnit.MILLISECONDS);
- }
- }
- if (p == null) {
- throw new NoSuchElementException(
- "Timeout waiting for idle object");
- }
- } else {
- if (p == null) {
- throw new NoSuchElementException("Pool exhausted");
- }
- }
- if (!p.allocate()) {
- p = null;
- }
- if (p != null) {
- try {
- // 确保激活当前数据库
- factory.activateObject(p);
- } catch (final Exception e) {
- try {
- destroy(p);
- } catch (final Exception e1) {
- // Ignore - activation failure is more important
- }
- p = null;
- if (create) {
- final NoSuchElementException nsee = new NoSuchElementException(
- "Unable to activate object");
- nsee.initCause(e);
- throw nsee;
- }
- }
- if (p != null && (getTestOnBorrow() || create && getTestOnCreate())) {
- boolean validate = false;
- Throwable validationThrowable = null;
- try {
- validate = factory.validateObject(p);
- } catch (final Throwable t) {
- PoolUtils.checkRethrow(t);
- validationThrowable = t;
- }
- if (!validate) {
- try {
- destroy(p);
- destroyedByBorrowValidationCount.incrementAndGet();
- } catch (final Exception e) {
- // Ignore - validation failure is more important
- }
- p = null;
- if (create) {
- final NoSuchElementException nsee = new NoSuchElementException(
- "Unable to validate object");
- nsee.initCause(validationThrowable);
- throw nsee;
- }
- }
- }
- }
- }
- updateStatsBorrow(p, System.currentTimeMillis() - waitTime);
- return p.getObject();
- }
- /**
- * Attempts to create a new wrapped pooled object.
- * <p>
- * If there are {@link #getMaxTotal()} objects already in circulation
- * or in process of being created, this method returns null.
- *
- * @return The new wrapped pooled object
- *
- * @throws Exception if the object factory's {@code makeObject} fails
- */
- private PooledObject<T> create() throws Exception {
- int localMaxTotal = getMaxTotal();
- // This simplifies the code later in this method
- if (localMaxTotal <0) {
- localMaxTotal = Integer.MAX_VALUE;
- }
- // Flag that indicates if create should:
- // - TRUE: call the factory to create an object
- // - FALSE: return null
- // - null: loop and re-test the condition that determines whether to
- // call the factory
- Boolean create = null;
- while (create == null) {
- synchronized (makeObjectCountLock) {
- final long newCreateCount = createCount.incrementAndGet();
- if (newCreateCount> localMaxTotal) {
- // The pool is currently at capacity or in the process of
- // making enough new objects to take it to capacity.
- createCount.decrementAndGet();
- if (makeObjectCount == 0) {
- // There are no makeObject() calls in progress so the
- // pool is at capacity. Do not attempt to create a new
- // object. Return and wait for an object to be returned
- create = Boolean.FALSE;
- } else {
- // There are makeObject() calls in progress that might
- // bring the pool to capacity. Those calls might also
- // fail so wait until they complete and then re-test if
- // the pool is at capacity or not.
- makeObjectCountLock.wait();
- }
- } else {
- // The pool is not at capacity. Create a new object.
- makeObjectCount++;
- create = Boolean.TRUE;
- }
- }
- }
- if (!create.booleanValue()) {
- return null;
- }
- final PooledObject<T> p;
- try {
- // 调用指定 factory 的 makeObject() 方法
- p = factory.makeObject();
- } catch (final Exception e) {
- createCount.decrementAndGet();
- throw e;
- } finally {
- synchronized (makeObjectCountLock) {
- makeObjectCount--;
- makeObjectCountLock.notifyAll();
- }
- }
- final AbandonedConfig ac = this.abandonedConfig;
- if (ac != null && ac.getLogAbandoned()) {
- p.setLogAbandoned(true);
- }
- createdCount.incrementAndGet();
- allObjects.put(new IdentityWrapper<T>(p.getObject()), p);
- return p;
- }
- // 使用 JedisFactory 创建一个连接到 master
- // Redis.clients.jedis.JedisFactory#makeObject
- @Override
- public PooledObject<Jedis> makeObject() throws Exception {
- final HostAndPort hostAndPort = this.hostAndPort.get();
- final Jedis jedis = new Jedis(hostAndPort.getHost(), hostAndPort.getPort(), connectionTimeout,
- soTimeout, ssl, sslSocketFactory, sslParameters, hostnameVerifier);
- try {
- jedis.connect();
- // 如果存在密码设置, 则进行 auth xxx 操作
- // Redis 配置: requirepass xxx
- if (null != this.password) {
- jedis.auth(this.password);
- }
- if (database != 0) {
- jedis.select(database);
- }
- if (clientName != null) {
- jedis.clientSetname(clientName);
- }
- } catch (JedisException je) {
- jedis.close();
- throw je;
- }
- return new DefaultPooledObject<Jedis>(jedis);
- }
- // Redis.clients.jedis.JedisFactory#activateObject
- @Override
- public void activateObject(PooledObject<Jedis> pooledJedis) throws Exception {
- final BinaryJedis jedis = pooledJedis.getObject();
- if (jedis.getDB() != database) {
- jedis.select(database);
- }
- }
来源: https://www.cnblogs.com/yougewe/p/12444375.html