mirror of
https://gitee.com/XM-GO/PandaX.git
synced 2026-04-23 19:08:35 +08:00
273 lines
8.8 KiB
Go
273 lines
8.8 KiB
Go
package node
|
||
|
||
import (
|
||
"context"
|
||
"errors"
|
||
"fmt"
|
||
"pandax/base/global"
|
||
|
||
"github.com/gin-gonic/gin"
|
||
v1 "k8s.io/api/core/v1"
|
||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||
"k8s.io/client-go/kubernetes"
|
||
"pandax/apps/devops/entity/k8s"
|
||
"pandax/apps/devops/services/k8s/dataselect"
|
||
"pandax/apps/devops/services/k8s/evict"
|
||
"pandax/apps/devops/services/k8s/parser"
|
||
"time"
|
||
)
|
||
|
||
// NodeList 包含集群中的节点列表.
|
||
type NodeList struct {
|
||
ListMeta k8s.ListMeta `json:"listMeta"`
|
||
Nodes []Node `json:"nodes"`
|
||
}
|
||
|
||
// Node is a presentation layer view of Kubernetes nodes. This means it is node plus additional
|
||
// augmented data we can get from other sources.
|
||
type Node struct {
|
||
ObjectMeta k8s.ObjectMeta `json:"objectMeta"`
|
||
TypeMeta k8s.TypeMeta `json:"typeMeta"`
|
||
Ready v1.ConditionStatus `json:"ready"`
|
||
Unschedulable k8s.Unschedulable `json:"unschedulable"`
|
||
NodeIP k8s.NodeIP `json:"nodeIP"`
|
||
AllocatedResources k8s.NodeAllocatedResources `json:"allocatedResources"`
|
||
NodeInfo v1.NodeSystemInfo `json:"nodeInfo"`
|
||
//RuntimeType string `json:"runtimeType"`
|
||
}
|
||
|
||
func GetNodeList(client *kubernetes.Clientset, dsQuery *gin.Context) (*NodeList, error) {
|
||
/*
|
||
获取所有Node节点信息
|
||
*/
|
||
|
||
nodes, err := client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
|
||
if err != nil {
|
||
return nil, fmt.Errorf("get nodes from cluster failed: %v", err)
|
||
}
|
||
|
||
return toNodeList(client, nodes.Items, dsQuery), nil
|
||
}
|
||
|
||
func toNodeList(client *kubernetes.Clientset, nodes []v1.Node, dsQuery *gin.Context) *NodeList {
|
||
|
||
nodeList := &NodeList{
|
||
Nodes: make([]Node, 0), // make初始化node信息
|
||
ListMeta: k8s.ListMeta{TotalItems: len(nodes)}, // 计算node数量
|
||
}
|
||
// 解析前端传递的参数, filterBy=name,1.1&itemsPerPage=10&name=&namespace=default&page=1&sortBy=d,creationTimestamp
|
||
// sortBy=d 倒序, sortBy=a 正序, 排序按照a-z
|
||
dataSelect := parser.ParseDataSelectPathParameter(dsQuery)
|
||
// 过滤
|
||
nodeCells, filteredTotal := dataselect.GenericDataSelectWithFilter(toCells(nodes), dataSelect)
|
||
nodes = fromCells(nodeCells)
|
||
// 更新node数量, filteredTotal过滤后的数量
|
||
nodeList.ListMeta = k8s.ListMeta{TotalItems: filteredTotal}
|
||
|
||
for _, node := range nodes {
|
||
// 根据Node名称去获取节点上面的pod,过滤时排除pod为 Succeeded, Failed 返回pods
|
||
pods, err := getNodePods(client, node)
|
||
if err != nil {
|
||
global.Log.Error(fmt.Sprintf("Couldn't get pods of %s node: %s\n", node.Name, err))
|
||
}
|
||
|
||
// 调用toNode方法获取 node节点的计算资源
|
||
nodeList.Nodes = append(nodeList.Nodes, toNode(node, pods, getNodeRole(node)))
|
||
}
|
||
|
||
return nodeList
|
||
}
|
||
|
||
func toNode(node v1.Node, pods *v1.PodList, role string) Node {
|
||
// 获取cpu和内存的reqs, limits使用
|
||
allocatedResources, err := getNodeAllocatedResources(node, pods)
|
||
if err != nil {
|
||
global.Log.Error(fmt.Sprintf("Couldn't get allocated resources of %s node: %s\n", node.Name, err))
|
||
}
|
||
|
||
return Node{
|
||
ObjectMeta: k8s.NewObjectMeta(node.ObjectMeta),
|
||
TypeMeta: k8s.NewTypeMeta(k8s.ResourceKind(role)),
|
||
Ready: getNodeConditionStatus(node, v1.NodeReady),
|
||
NodeIP: k8s.NodeIP(getNodeIP(node)),
|
||
Unschedulable: k8s.Unschedulable(node.Spec.Unschedulable),
|
||
AllocatedResources: allocatedResources,
|
||
NodeInfo: node.Status.NodeInfo,
|
||
}
|
||
}
|
||
|
||
func getNodeConditionStatus(node v1.Node, conditionType v1.NodeConditionType) v1.ConditionStatus {
|
||
for _, condition := range node.Status.Conditions {
|
||
if condition.Type == conditionType {
|
||
return condition.Status
|
||
}
|
||
}
|
||
return v1.ConditionUnknown
|
||
}
|
||
|
||
func getNodeIP(node v1.Node) string {
|
||
for _, addr := range node.Status.Addresses {
|
||
if addr.Type == v1.NodeInternalIP {
|
||
return addr.Address
|
||
}
|
||
}
|
||
return ""
|
||
}
|
||
|
||
func getNodeRole(node v1.Node) string {
|
||
var role string
|
||
if _, ok := node.ObjectMeta.Labels["node-role.kubernetes.io/master"]; ok {
|
||
role = "Master"
|
||
} else {
|
||
role = "Worker"
|
||
}
|
||
return role
|
||
}
|
||
|
||
func GetNodeResource(client *kubernetes.Clientset) (namespaces int, deployments int, pods int) {
|
||
/*
|
||
获取集群 namespace数量 deployment数量 pod数量 container数量
|
||
*/
|
||
namespace, err := client.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{})
|
||
namespaces = len(namespace.Items)
|
||
if err != nil {
|
||
global.Log.Error("list namespace err")
|
||
}
|
||
for _, v := range namespace.Items {
|
||
deployment, err := client.AppsV1().Deployments(v.Name).List(context.TODO(), metav1.ListOptions{})
|
||
deployments += len(deployment.Items)
|
||
if err != nil {
|
||
global.Log.Error("get deployment err")
|
||
}
|
||
pod, err := client.CoreV1().Pods(v.Name).List(context.TODO(), metav1.ListOptions{})
|
||
if err != nil {
|
||
global.Log.Error("get pod err")
|
||
}
|
||
pods += len(pod.Items)
|
||
}
|
||
|
||
return namespaces, deployments, pods
|
||
}
|
||
|
||
func NodeUnschdulable(client *kubernetes.Clientset, nodeName string, unschdulable bool) (bool, error) {
|
||
/*
|
||
设置节点是否可调度
|
||
*/
|
||
global.Log.Info(fmt.Sprintf("设置Node节点:%v 不可调度: %v", nodeName, unschdulable))
|
||
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
|
||
if err != nil {
|
||
global.Log.Error(fmt.Sprintf("get node err: %v", err.Error()))
|
||
return false, err
|
||
}
|
||
node.Spec.Unschedulable = unschdulable
|
||
|
||
_, err2 := client.CoreV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{})
|
||
|
||
if err2 != nil {
|
||
global.Log.Error(fmt.Sprintf("设置节点调度失败:%v", err2.Error()))
|
||
return false, err2
|
||
}
|
||
return true, nil
|
||
}
|
||
|
||
func CordonNode(client *kubernetes.Clientset, nodeName string) (bool, error) {
|
||
/*
|
||
排空节点
|
||
选择排空节点(同时设置为不可调度),在后续进行应用部署时,则Pod不会再调度到该节点,并且该节点上由DaemonSet控制的Pod不会被排空。
|
||
kubectl drain cn-beijing.i-2ze19qyi8votgjz12345 --grace-period=120 --ignore-daemonsets=true
|
||
*/
|
||
_, err := NodeUnschdulable(client, nodeName, true)
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
err = evict.EvictsNodePods(client, nodeName)
|
||
if err != nil {
|
||
global.Log.Error(fmt.Sprintf("排空节点出现异常: %v", err.Error()))
|
||
return false, err
|
||
}
|
||
return true, nil
|
||
|
||
}
|
||
|
||
func RemoveNode(client *kubernetes.Clientset, nodeName string) (bool, error) {
|
||
startTime := time.Now()
|
||
global.Log.Info(fmt.Sprintf("移除Node节点:%v, 异步任务已开始", nodeName))
|
||
_, err := NodeUnschdulable(client, nodeName, true)
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
err = evict.EvictsNodePods(client, nodeName)
|
||
if err != nil {
|
||
global.Log.Error(fmt.Sprintf("排空节点出现异常: %v", err.Error()))
|
||
return false, err
|
||
}
|
||
err2 := client.CoreV1().Nodes().Delete(context.TODO(), nodeName, metav1.DeleteOptions{})
|
||
if err2 != nil {
|
||
return false, err2
|
||
}
|
||
global.Log.Info(fmt.Sprintf("已将节点:%v从集群中移除, 异步任务已完成,任务耗时:%v", nodeName, time.Since(startTime)))
|
||
|
||
return true, nil
|
||
}
|
||
|
||
func CollectionNodeUnschedule(client *kubernetes.Clientset, nodeName []string) error {
|
||
/*
|
||
批量设置Node节点不可调度
|
||
{"node_name": ["k8s-master", "k8s-node"]}
|
||
*/
|
||
if len(nodeName) <= 0 {
|
||
return errors.New("节点名称不能为空")
|
||
}
|
||
global.Log.Info(fmt.Sprintf("批量设置Node节点:%v 不可调度:true", nodeName))
|
||
for _, v := range nodeName {
|
||
node, err := client.CoreV1().Nodes().Get(context.TODO(), v, metav1.GetOptions{})
|
||
if err != nil {
|
||
global.Log.Error(fmt.Sprintf("get node err: %v", err.Error()))
|
||
return err
|
||
}
|
||
node.Spec.Unschedulable = true
|
||
|
||
_, err2 := client.CoreV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{})
|
||
|
||
if err2 != nil {
|
||
global.Log.Error(fmt.Sprintf("设置节点调度失败:%v", err2.Error()))
|
||
return err
|
||
}
|
||
}
|
||
global.Log.Info(fmt.Sprintf("已将所有Node节点:%v 设置为不可调度", nodeName))
|
||
return nil
|
||
}
|
||
|
||
func CollectionCordonNode(client *kubernetes.Clientset, nodeName []string) error {
|
||
/*
|
||
批量排空Node节点, 不允许调度
|
||
{"node_name": ["k8s-master", "k8s-node"]}
|
||
*/
|
||
|
||
if len(nodeName) <= 0 {
|
||
return errors.New("节点名称不能为空")
|
||
}
|
||
global.Log.Info(fmt.Sprintf("开始排空节点, 设置Node节点:%v 不可调度:true", nodeName))
|
||
for _, v := range nodeName {
|
||
node, err := client.CoreV1().Nodes().Get(context.TODO(), v, metav1.GetOptions{})
|
||
if err != nil {
|
||
global.Log.Error(fmt.Sprintf("get node err: %v", err.Error()))
|
||
return err
|
||
}
|
||
node.Spec.Unschedulable = true
|
||
|
||
_, err2 := client.CoreV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{})
|
||
|
||
if err2 != nil {
|
||
global.Log.Error(fmt.Sprintf("设置节点调度失败:%v", err2.Error()))
|
||
return err
|
||
}
|
||
_, cordonErr := CordonNode(client, v)
|
||
if cordonErr != nil {
|
||
return cordonErr
|
||
}
|
||
}
|
||
global.Log.Info(fmt.Sprintf("已将所有Node节点:%v 设置为不可调度", nodeName))
|
||
return nil
|
||
}
|