/*
 * Copyright (c) 2024 Yunshan Networks
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package monitor

import (
	"context"
	"sort"
	"time"

	mapset "github.com/deckarep/golang-set"

	"github.com/deepflowio/deepflow/server/controller/common"
	"github.com/deepflowio/deepflow/server/controller/config"
	"github.com/deepflowio/deepflow/server/controller/db/metadb"
	metadbmodel "github.com/deepflowio/deepflow/server/controller/db/metadb/model"
	"github.com/deepflowio/deepflow/server/controller/model"
	mconfig "github.com/deepflowio/deepflow/server/controller/monitor/config"
	"github.com/deepflowio/deepflow/server/controller/trisolaris/refresh"
)

type dbAndIP struct {
	db *metadb.DB
	ip string
}

type ControllerCheck struct {
	cCtx                    context.Context
	cCancel                 context.CancelFunc
	cfg                     mconfig.MonitorConfig
	healthCheckPort         int
	healthCheckNodePort     int
	ch                      chan dbAndIP
	normalControllerDict    map[string]*dfHostCheck
	exceptionControllerDict map[string]*dfHostCheck
}

func NewControllerCheck(cfg *config.ControllerConfig, ctx context.Context) *ControllerCheck {
	cCtx, cCancel := context.WithCancel(ctx)
	return &ControllerCheck{
		cCtx:                    cCtx,
		cCancel:                 cCancel,
		cfg:                     cfg.MonitorCfg,
		healthCheckPort:         cfg.ListenPort,
		healthCheckNodePort:     cfg.ListenNodePort,
		ch:                      make(chan dbAndIP, cfg.MonitorCfg.HealthCheckHandleChannelLen),
		normalControllerDict:    make(map[string]*dfHostCheck),
		exceptionControllerDict: make(map[string]*dfHostCheck),
	}
}

func (c *ControllerCheck) Start(sCtx context.Context) {
	log.Info("controller check start")
	go func() {
		ticker := time.NewTicker(time.Duration(c.cfg.SyncDefaultORGDataInterval) * time.Second)
		defer ticker.Stop()
	LOOP1:
		for {
			select {
			case <-ticker.C:
				c.SyncDefaultOrgData()
			case <-sCtx.Done():
				break LOOP1
			case <-c.cCtx.Done():
				break LOOP1
			}
		}
	}()

	go func() {
		ticker := time.NewTicker(time.Duration(c.cfg.HealthCheckInterval) * time.Second)
		defer ticker.Stop()
	LOOP2:
		for {
			select {
			case <-ticker.C:
				if err := metadb.DoOnAllDBs(func(db *metadb.DB) error {
					// 控制器健康检查
					c.healthCheck(db)
					// 检查没有分配控制器的采集器，并进行分配
					c.vtapControllerCheck(db)
					// check az_controller_connection, delete unused item
					c.azConnectionCheck(db)
					return nil
				}); err != nil {
					log.Error(err)
				}
			case <-sCtx.Done():
				break LOOP2
			case <-c.cCtx.Done():
				break LOOP2
			}
		}
	}()

	// 根据ch信息，针对部分采集器分配/重新分配控制器
	go func() {
		for {
			dbAndIP := <-c.ch
			c.vtapControllerAlloc(dbAndIP.db, dbAndIP.ip)
			refresh.RefreshCache(dbAndIP.db.ORGID, []common.DataChanged{common.DATA_CHANGED_VTAP})
		}
	}()

}

func (c *ControllerCheck) Stop() {
	if c.cCancel != nil {
		c.cCancel()
	}
	log.Info("controller check stopped")
}

var checkExceptionControllers = make(map[string]*dfHostCheck)

func (c *ControllerCheck) healthCheck(orgDB *metadb.DB) {
	var controllers []metadbmodel.Controller
	var exceptionIPs []string

	log.Info("controller health check start")

	if err := metadb.DefaultDB.Where("state != ?", common.HOST_STATE_MAINTENANCE).Order("state desc").Find(&controllers).Error; err != nil {
		log.Errorf("get controller from db error: %v", err)
		return
	}
	for _, controller := range controllers {
		// 健康检查过程，为了防止网络抖动，(3 * interval)时间内都正常/异常才进行状态修改
		// 如果数据库状态是正常，且检查正常
		// - 检查是否在正常/异常Dict中
		//   - 如果在，则从异常Dict中移除
		//   - 如果不在，do nothing
		// 如果数据库状态是正常，但检测异常
		// - 检查是否在异常Dict中
		//   - 如果在，则检查是否已经满足一定时间内都是异常
		//     - 如果满足，则从异常Dict移除，且更新数据库状态为异常
		//     - 如果不满足，do nothing
		//   - 如果不在，则加入异常Dict
		// 如果数据库状态是异常，但检测正常
		// - 检查是否在正常Dict中
		//   - 如果在，则检查是否已经满足一定时间内都不是异常
		//     - 如果满足，则从正常Dict移除，且更新数据库状态为正常
		//     - 如果不满足，do nothing
		//   - 如果不在，则加入正常Dict
		// 如果数据库状态是异常，且检测异常
		// - 检查是否在正常/异常Dict中
		//   - 如果在，则从正常/异常Dict中移除
		//   - 如果不在，do nothing

		// use pod ip in master region if pod_ip != null
		controllerIP := controller.IP
		healthCheckPort := c.healthCheckNodePort
		if controller.NodeType == common.CONTROLLER_NODE_TYPE_MASTER && len(controller.PodIP) != 0 {
			controllerIP = controller.PodIP
			healthCheckPort = c.healthCheckPort
		}
		active := isActive(common.HEALTH_CHECK_URL, controllerIP, healthCheckPort)
		if controller.State == common.HOST_STATE_COMPLETE {
			if active {
				if _, ok := c.normalControllerDict[controller.IP]; ok {
					delete(c.normalControllerDict, controller.IP)
				}
				if _, ok := c.exceptionControllerDict[controller.IP]; ok {
					delete(c.exceptionControllerDict, controller.IP)
				}
				delete(checkExceptionControllers, controller.IP)
			} else {
				if _, ok := c.exceptionControllerDict[controller.IP]; ok {
					if c.exceptionControllerDict[controller.IP].duration() >= int64(3*common.HEALTH_CHECK_INTERVAL.Seconds()) {
						delete(c.exceptionControllerDict, controller.IP)
						if err := metadb.DefaultDB.Model(&controller).Update("state", common.HOST_STATE_EXCEPTION).Error; err != nil {
							log.Errorf("update controller(name: %s, ip: %s) state error: %v", controller.Name, controller.IP, err)
						}
						exceptionIPs = append(exceptionIPs, controller.IP)
						log.Infof("set controller (%s) state to exception", controller.IP)
						// 根据exceptionIP，重新分配对应采集器的控制器
						c.TriggerReallocController(orgDB, controller.IP)
						if _, ok := checkExceptionControllers[controller.IP]; ok == false {
							checkExceptionControllers[controller.IP] = newDFHostCheck()
						}
					}
				} else {
					c.exceptionControllerDict[controller.IP] = newDFHostCheck()
				}
			}
		} else {
			if _, ok := checkExceptionControllers[controller.IP]; ok == false {
				checkExceptionControllers[controller.IP] = newDFHostCheck()
			}
			if active {
				if _, ok := c.normalControllerDict[controller.IP]; ok {
					if c.normalControllerDict[controller.IP].duration() >= int64(3*common.HEALTH_CHECK_INTERVAL.Seconds()) {
						delete(c.normalControllerDict, controller.IP)
						if err := metadb.DefaultDB.Model(&controller).Update("state", common.HOST_STATE_COMPLETE).Error; err != nil {
							log.Errorf("update controller(name: %s, ip: %s) state error: %v", controller.Name, controller.IP, err)
						}
						log.Infof("set controller (%s) state to normal", controller.IP)
						delete(checkExceptionControllers, controller.IP)
					}
				} else {
					c.normalControllerDict[controller.IP] = newDFHostCheck()
				}
			} else {
				if _, ok := c.normalControllerDict[controller.IP]; ok {
					delete(c.normalControllerDict, controller.IP)
				}
				if _, ok := c.exceptionControllerDict[controller.IP]; ok {
					delete(c.exceptionControllerDict, controller.IP)
				}
			}
		}
	}

	controllerIPs := []string{}
	for ip, dfhostCheck := range checkExceptionControllers {
		if dfhostCheck.duration() > int64(c.cfg.ExceptionTimeFrame) {
			if err := orgDB.Delete(metadbmodel.AZControllerConnection{}, "controller_ip = ?", ip).Error; err != nil {
				log.Errorf("delete az_controller_connection(ip: %s) error: %s", ip, err.Error(), orgDB.LogPrefixORGID)
			}
			err := metadb.DefaultDB.Delete(metadbmodel.Controller{}, "ip = ?", ip).Error
			if err != nil {
				log.Errorf("delete controller(%s) failed, err:%s", ip, err)
			} else {
				log.Infof("delete controller(%s), exception lasts for %d seconds", ip, dfhostCheck.duration())
				delete(checkExceptionControllers, ip)
			}
			controllerIPs = append(controllerIPs, ip)
		}
	}
	c.cleanExceptionControllerData(orgDB, controllerIPs)
	log.Info("controller health check end")
}

func (c *ControllerCheck) TriggerReallocController(orgDB *metadb.DB, controllerIP string) {
	c.ch <- dbAndIP{db: orgDB, ip: controllerIP}
}

func (c *ControllerCheck) vtapControllerCheck(orgDB *metadb.DB) {
	var vtaps []metadbmodel.VTap
	var noControllerVtapCount int64

	log.Info("vtap controller check start", orgDB.LogPrefixORGID)

	ipMap, err := getIPMap(common.HOST_TYPE_CONTROLLER)
	if err != nil {
		log.Error(err, orgDB.LogPrefixORGID)
		return
	}
	if len(ipMap) == 0 {
		log.Info("no controller in DB, do nothing", orgDB.LogPrefixORGID)
		return
	}

	if err := orgDB.Where("type != ?", common.VTAP_TYPE_TUNNEL_DECAPSULATION).Find(&vtaps).Error; err != nil {
		log.Error(err, orgDB.LogPrefixORGID)
		return
	}
	for _, vtap := range vtaps {
		// check vtap.controller_ip is not in controller.ip, set to empty if not exist
		if _, ok := ipMap[vtap.ControllerIP]; !ok {
			log.Infof("controller ip(%s) in vtap(%s) is invalid", vtap.ControllerIP, vtap.Name, orgDB.LogPrefixORGID)
			vtap.ControllerIP = ""
			if err := orgDB.Model(&metadbmodel.VTap{}).Where("lcuuid = ?", vtap.Lcuuid).Update("controller_ip", "").Error; err != nil {
				log.Errorf("update vtap(lcuuid: %s, name: %s) controller ip to empty error: %v", vtap.Lcuuid, vtap.Name, err, orgDB.LogPrefixORGID)
			}
		}

		if vtap.ControllerIP == "" {
			noControllerVtapCount += 1
		} else if vtap.Exceptions&common.VTAP_EXCEPTION_ALLOC_CONTROLLER_FAILED != 0 {
			// 检查是否存在已分配控制器，但异常未清除的采集器
			exceptions := vtap.Exceptions ^ common.VTAP_EXCEPTION_ALLOC_CONTROLLER_FAILED
			orgDB.Model(&vtap).Update("exceptions", exceptions)
		}
	}
	// 如果存在没有控制器的采集器，触发控制器重新分配
	if noControllerVtapCount > 0 {
		c.TriggerReallocController(orgDB, "")
	}
	log.Info("vtap controller check end", orgDB.LogPrefixORGID)
}

func (c *ControllerCheck) vtapControllerAlloc(orgDB *metadb.DB, excludeIP string) {
	var vtaps []metadbmodel.VTap
	var controllers []metadbmodel.Controller
	var azs []metadbmodel.AZ
	var azControllerConns []metadbmodel.AZControllerConnection

	log.Info("vtap controller alloc start")

	if err := orgDB.Where("type != ?", common.VTAP_TYPE_TUNNEL_DECAPSULATION).Find(&vtaps).Error; err != nil {
		log.Error(err, orgDB.LogPrefixORGID)
		return
	}
	if err := orgDB.Where("state = ?", common.HOST_STATE_COMPLETE).Find(&controllers).Error; err != nil {
		log.Error(err, orgDB.LogPrefixORGID)
		return
	}

	// 获取待分配采集器对应的可用区信息
	// 获取控制器当前已分配的采集器个数
	azToNoControllerVTaps := make(map[string][]*metadbmodel.VTap)
	controllerIPToUsedVTapNum := make(map[string]int)
	azLcuuids := mapset.NewSet()
	for i, vtap := range vtaps {
		if vtap.ControllerIP != "" && vtap.ControllerIP != excludeIP {
			controllerIPToUsedVTapNum[vtap.ControllerIP] += 1
			continue
		}
		azToNoControllerVTaps[vtap.AZ] = append(azToNoControllerVTaps[vtap.AZ], &vtaps[i])
		azLcuuids.Add(vtap.AZ)
	}
	// 获取控制器的剩余采集器个数
	controllerIPToAvailableVTapNum := make(map[string]int)
	for _, controller := range controllers {
		controllerIPToAvailableVTapNum[controller.IP] = controller.VTapMax
		if usedVTapNum, ok := controllerIPToUsedVTapNum[controller.IP]; ok {
			controllerIPToAvailableVTapNum[controller.IP] -= usedVTapNum
		}
	}

	// 根据可用区查询region信息
	if err := orgDB.Where("lcuuid IN (?)", azLcuuids.ToSlice()).Find(&azs).Error; err != nil {
		log.Error(err, orgDB.LogPrefixORGID)
		return
	}
	regionToAZLcuuids := make(map[string][]string)
	regionLcuuids := mapset.NewSet()
	for _, az := range azs {
		regionToAZLcuuids[az.Region] = append(regionToAZLcuuids[az.Region], az.Lcuuid)
		regionLcuuids.Add(az.Region)
	}

	// 获取可用区中的控制器IP
	orgDB.Where("region IN (?)", regionLcuuids.ToSlice()).Find(&azControllerConns)
	azToControllerIPs := make(map[string][]string)
	for _, conn := range azControllerConns {
		if conn.AZ == "ALL" {
			if azLcuuids, ok := regionToAZLcuuids[conn.Region]; ok {
				for _, azLcuuid := range azLcuuids {
					azToControllerIPs[azLcuuid] = append(azToControllerIPs[azLcuuid], conn.ControllerIP)
				}
			}
		} else {
			azToControllerIPs[conn.AZ] = append(azToControllerIPs[conn.AZ], conn.ControllerIP)
		}
	}

	// 遍历待分配采集器，分配控制器IP
	for az, noControllerVtaps := range azToNoControllerVTaps {
		// 获取可分配的控制器列表
		controllerAvailableVTapNum := []common.KVPair{}
		if controllerIPs, ok := azToControllerIPs[az]; ok {
			for _, controllerIP := range controllerIPs {
				if availableVTapNum, ok := controllerIPToAvailableVTapNum[controllerIP]; ok {
					controllerAvailableVTapNum = append(
						controllerAvailableVTapNum,
						common.KVPair{Key: controllerIP, Value: availableVTapNum},
					)
				}
			}
		}

		for _, vtap := range noControllerVtaps {
			// 分配控制器失败，更新异常错误码
			if len(controllerAvailableVTapNum) == 0 {
				log.Warningf("no available controller for vtap (%s)", vtap.Name, orgDB.LogPrefixORGID)
				exceptions := vtap.Exceptions | common.VTAP_EXCEPTION_ALLOC_CONTROLLER_FAILED
				if err := orgDB.Model(&vtap).Update("exceptions", exceptions).Error; err != nil {
					log.Errorf("update vtap(name: %s) exceptions(%d) error: %v", vtap.Name, exceptions, err, orgDB.LogPrefixORGID)
				}
				continue
			}
			sort.Slice(controllerAvailableVTapNum, func(i, j int) bool {
				return controllerAvailableVTapNum[i].Value > controllerAvailableVTapNum[j].Value
			})
			// Search for controllers that have capacity. If none has capacity, the collector limit is allowed.
			// There are five types of Value in controllerAvailableVTapNum:
			// 1. All positive numbers
			// 2. Positive numbers and 0
			// 3. All are 0
			// 4, 0 and negative numbers
			// 5. All negative numbers
			controllerAvailableVTapNum[0].Value -= 1
			controllerIPToAvailableVTapNum[controllerAvailableVTapNum[0].Key] -= 1

			// 分配控制器成功，更新控制器IP + 清空控制器分配失败的错误码
			log.Infof("alloc controller (%s) for vtap (%s)", controllerAvailableVTapNum[0].Key, vtap.Name, orgDB.LogPrefixORGID)
			if err := orgDB.Model(&vtap).Update("controller_ip", controllerAvailableVTapNum[0].Key).Error; err != nil {
				log.Error(err, orgDB.LogPrefixORGID, orgDB.LogPrefixORGID)
			}
			if vtap.Exceptions&common.VTAP_EXCEPTION_ALLOC_CONTROLLER_FAILED != 0 {
				exceptions := vtap.Exceptions ^ common.VTAP_EXCEPTION_ALLOC_CONTROLLER_FAILED
				if err := orgDB.Model(&vtap).Update("exceptions", exceptions).Error; err != nil {
					log.Error(err, orgDB.LogPrefixORGID, orgDB.LogPrefixORGID)
				}
			}
		}
	}
	log.Info("vtap controller alloc end", orgDB.LogPrefixORGID)
}

func (c *ControllerCheck) azConnectionCheck(orgDB *metadb.DB) {
	var azs []metadbmodel.AZ
	var azControllerConns []metadbmodel.AZControllerConnection

	log.Info("az connection check start", orgDB.LogPrefixORGID)

	if err := orgDB.Find(&azs).Error; err != nil {
		log.Error(err, orgDB.LogPrefixORGID)
		return
	}
	azLcuuidToName := make(map[string]string)
	for _, az := range azs {
		azLcuuidToName[az.Lcuuid] = az.Name
	}

	if err := orgDB.Find(&azControllerConns).Error; err != nil {
		log.Error(err, orgDB.LogPrefixORGID)
	}
	for _, conn := range azControllerConns {
		if conn.AZ == "ALL" {
			continue
		}
		if name, ok := azLcuuidToName[conn.AZ]; !ok {
			if err := orgDB.Delete(&conn).Error; err != nil {
				log.Infof("fail to delete controller (ip: %s) az (name: %s, lcuuid: %s, region: %s) connection, err: %s",
					conn.ControllerIP, name, conn.AZ, conn.Region, err.Error(), orgDB.LogPrefixORGID)
				continue
			}
			log.Infof("delete controller (ip: %s) az (name: %s, lcuuid: %s, region: %s) connection",
				conn.ControllerIP, name, conn.AZ, conn.Region, orgDB.LogPrefixORGID)
		}
	}
	log.Info("az connection check end", orgDB.LogPrefixORGID)
}

func (c *ControllerCheck) cleanExceptionControllerData(orgDB *metadb.DB, controllerIPs []string) {
	if len(controllerIPs) == 0 {
		return
	}

	// delete genesis vinterface on invalid controller
	err := orgDB.Where("node_ip IN ?", controllerIPs).Delete(&model.GenesisVinterface{}).Error
	if err != nil {
		log.Errorf("clean controllers (%s) genesis vinterface failed: %s", controllerIPs, err, orgDB.LogPrefixORGID)
	} else {
		log.Infof("clean controllers (%s) genesis vinterface success", controllerIPs, orgDB.LogPrefixORGID)
	}
}

var SyncControllerExcludeField = []string{"nat_ip", "state"}

func (c *ControllerCheck) SyncDefaultOrgData() {
	var controllers []metadbmodel.Controller
	if err := metadb.DefaultDB.Find(&controllers).Error; err != nil {
		log.Error(err)
	}
	if err := metadb.SyncDefaultOrgData(controllers, SyncControllerExcludeField); err != nil {
		log.Error(err)
	}
}
