Files
PandaX/apps/devops/services/k8s/node/node.go
2022-01-22 17:07:04 +08:00

273 lines
8.8 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package node
import (
"context"
"errors"
"fmt"
"pandax/base/global"
"github.com/gin-gonic/gin"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"pandax/apps/devops/entity/k8s"
"pandax/apps/devops/services/k8s/dataselect"
"pandax/apps/devops/services/k8s/evict"
"pandax/apps/devops/services/k8s/parser"
"time"
)
// NodeList 包含集群中的节点列表.
type NodeList struct {
ListMeta k8s.ListMeta `json:"listMeta"`
Nodes []Node `json:"nodes"`
}
// Node is a presentation layer view of Kubernetes nodes. This means it is node plus additional
// augmented data we can get from other sources.
type Node struct {
ObjectMeta k8s.ObjectMeta `json:"objectMeta"`
TypeMeta k8s.TypeMeta `json:"typeMeta"`
Ready v1.ConditionStatus `json:"ready"`
Unschedulable k8s.Unschedulable `json:"unschedulable"`
NodeIP k8s.NodeIP `json:"nodeIP"`
AllocatedResources k8s.NodeAllocatedResources `json:"allocatedResources"`
NodeInfo v1.NodeSystemInfo `json:"nodeInfo"`
//RuntimeType string `json:"runtimeType"`
}
func GetNodeList(client *kubernetes.Clientset, dsQuery *gin.Context) (*NodeList, error) {
/*
获取所有Node节点信息
*/
nodes, err := client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
if err != nil {
return nil, fmt.Errorf("get nodes from cluster failed: %v", err)
}
return toNodeList(client, nodes.Items, dsQuery), nil
}
func toNodeList(client *kubernetes.Clientset, nodes []v1.Node, dsQuery *gin.Context) *NodeList {
nodeList := &NodeList{
Nodes: make([]Node, 0), // make初始化node信息
ListMeta: k8s.ListMeta{TotalItems: len(nodes)}, // 计算node数量
}
// 解析前端传递的参数, filterBy=name,1.1&itemsPerPage=10&name=&namespace=default&page=1&sortBy=d,creationTimestamp
// sortBy=d 倒序, sortBy=a 正序, 排序按照a-z
dataSelect := parser.ParseDataSelectPathParameter(dsQuery)
// 过滤
nodeCells, filteredTotal := dataselect.GenericDataSelectWithFilter(toCells(nodes), dataSelect)
nodes = fromCells(nodeCells)
// 更新node数量, filteredTotal过滤后的数量
nodeList.ListMeta = k8s.ListMeta{TotalItems: filteredTotal}
for _, node := range nodes {
// 根据Node名称去获取节点上面的pod过滤时排除pod为 Succeeded, Failed 返回pods
pods, err := getNodePods(client, node)
if err != nil {
global.Log.Error(fmt.Sprintf("Couldn't get pods of %s node: %s\n", node.Name, err))
}
// 调用toNode方法获取 node节点的计算资源
nodeList.Nodes = append(nodeList.Nodes, toNode(node, pods, getNodeRole(node)))
}
return nodeList
}
func toNode(node v1.Node, pods *v1.PodList, role string) Node {
// 获取cpu和内存的reqs, limits使用
allocatedResources, err := getNodeAllocatedResources(node, pods)
if err != nil {
global.Log.Error(fmt.Sprintf("Couldn't get allocated resources of %s node: %s\n", node.Name, err))
}
return Node{
ObjectMeta: k8s.NewObjectMeta(node.ObjectMeta),
TypeMeta: k8s.NewTypeMeta(k8s.ResourceKind(role)),
Ready: getNodeConditionStatus(node, v1.NodeReady),
NodeIP: k8s.NodeIP(getNodeIP(node)),
Unschedulable: k8s.Unschedulable(node.Spec.Unschedulable),
AllocatedResources: allocatedResources,
NodeInfo: node.Status.NodeInfo,
}
}
func getNodeConditionStatus(node v1.Node, conditionType v1.NodeConditionType) v1.ConditionStatus {
for _, condition := range node.Status.Conditions {
if condition.Type == conditionType {
return condition.Status
}
}
return v1.ConditionUnknown
}
func getNodeIP(node v1.Node) string {
for _, addr := range node.Status.Addresses {
if addr.Type == v1.NodeInternalIP {
return addr.Address
}
}
return ""
}
func getNodeRole(node v1.Node) string {
var role string
if _, ok := node.ObjectMeta.Labels["node-role.kubernetes.io/master"]; ok {
role = "Master"
} else {
role = "Worker"
}
return role
}
func GetNodeResource(client *kubernetes.Clientset) (namespaces int, deployments int, pods int) {
/*
获取集群 namespace数量 deployment数量 pod数量 container数量
*/
namespace, err := client.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{})
namespaces = len(namespace.Items)
if err != nil {
global.Log.Error("list namespace err")
}
for _, v := range namespace.Items {
deployment, err := client.AppsV1().Deployments(v.Name).List(context.TODO(), metav1.ListOptions{})
deployments += len(deployment.Items)
if err != nil {
global.Log.Error("get deployment err")
}
pod, err := client.CoreV1().Pods(v.Name).List(context.TODO(), metav1.ListOptions{})
if err != nil {
global.Log.Error("get pod err")
}
pods += len(pod.Items)
}
return namespaces, deployments, pods
}
func NodeUnschdulable(client *kubernetes.Clientset, nodeName string, unschdulable bool) (bool, error) {
/*
设置节点是否可调度
*/
global.Log.Info(fmt.Sprintf("设置Node节点:%v 不可调度: %v", nodeName, unschdulable))
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
if err != nil {
global.Log.Error(fmt.Sprintf("get node err: %v", err.Error()))
return false, err
}
node.Spec.Unschedulable = unschdulable
_, err2 := client.CoreV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{})
if err2 != nil {
global.Log.Error(fmt.Sprintf("设置节点调度失败:%v", err2.Error()))
return false, err2
}
return true, nil
}
func CordonNode(client *kubernetes.Clientset, nodeName string) (bool, error) {
/*
排空节点
选择排空节点同时设置为不可调度在后续进行应用部署时则Pod不会再调度到该节点并且该节点上由DaemonSet控制的Pod不会被排空。
kubectl drain cn-beijing.i-2ze19qyi8votgjz12345 --grace-period=120 --ignore-daemonsets=true
*/
_, err := NodeUnschdulable(client, nodeName, true)
if err != nil {
return false, err
}
err = evict.EvictsNodePods(client, nodeName)
if err != nil {
global.Log.Error(fmt.Sprintf("排空节点出现异常: %v", err.Error()))
return false, err
}
return true, nil
}
func RemoveNode(client *kubernetes.Clientset, nodeName string) (bool, error) {
startTime := time.Now()
global.Log.Info(fmt.Sprintf("移除Node节点:%v, 异步任务已开始", nodeName))
_, err := NodeUnschdulable(client, nodeName, true)
if err != nil {
return false, err
}
err = evict.EvictsNodePods(client, nodeName)
if err != nil {
global.Log.Error(fmt.Sprintf("排空节点出现异常: %v", err.Error()))
return false, err
}
err2 := client.CoreV1().Nodes().Delete(context.TODO(), nodeName, metav1.DeleteOptions{})
if err2 != nil {
return false, err2
}
global.Log.Info(fmt.Sprintf("已将节点:%v从集群中移除, 异步任务已完成,任务耗时:%v", nodeName, time.Since(startTime)))
return true, nil
}
func CollectionNodeUnschedule(client *kubernetes.Clientset, nodeName []string) error {
/*
批量设置Node节点不可调度
{"node_name": ["k8s-master", "k8s-node"]}
*/
if len(nodeName) <= 0 {
return errors.New("节点名称不能为空")
}
global.Log.Info(fmt.Sprintf("批量设置Node节点:%v 不可调度true", nodeName))
for _, v := range nodeName {
node, err := client.CoreV1().Nodes().Get(context.TODO(), v, metav1.GetOptions{})
if err != nil {
global.Log.Error(fmt.Sprintf("get node err: %v", err.Error()))
return err
}
node.Spec.Unschedulable = true
_, err2 := client.CoreV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{})
if err2 != nil {
global.Log.Error(fmt.Sprintf("设置节点调度失败:%v", err2.Error()))
return err
}
}
global.Log.Info(fmt.Sprintf("已将所有Node节点:%v 设置为不可调度", nodeName))
return nil
}
func CollectionCordonNode(client *kubernetes.Clientset, nodeName []string) error {
/*
批量排空Node节点 不允许调度
{"node_name": ["k8s-master", "k8s-node"]}
*/
if len(nodeName) <= 0 {
return errors.New("节点名称不能为空")
}
global.Log.Info(fmt.Sprintf("开始排空节点, 设置Node节点:%v 不可调度true", nodeName))
for _, v := range nodeName {
node, err := client.CoreV1().Nodes().Get(context.TODO(), v, metav1.GetOptions{})
if err != nil {
global.Log.Error(fmt.Sprintf("get node err: %v", err.Error()))
return err
}
node.Spec.Unschedulable = true
_, err2 := client.CoreV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{})
if err2 != nil {
global.Log.Error(fmt.Sprintf("设置节点调度失败:%v", err2.Error()))
return err
}
_, cordonErr := CordonNode(client, v)
if cordonErr != nil {
return cordonErr
}
}
global.Log.Info(fmt.Sprintf("已将所有Node节点:%v 设置为不可调度", nodeName))
return nil
}