GPU

快速查看显卡使用情况和占用用户

Quick Check GPU Usage and Username

ZingLix November 17, 2021

使用方法: python gpu.py

需要的依赖: xmltodict

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import subprocess
import xmltodict, pwd, json

UID = 1
EUID = 2


def owner(pid):
    """Return username of UID of process pid"""
    for ln in open("/proc/{}/status".format(pid)):
        if ln.startswith("Uid:"):
            uid = int(ln.split()[UID])
            return pwd.getpwuid(uid).pw_name


def add_user(process):
    tmp = []
    for p in process:
        p["user"] = owner(p["pid"])
        tmp.append(p)
    return tmp


def simplify(gpu):
    tmp = {}
    for k in gpu.keys():
        if k in [
            "@id",
            "product_name",
            "fan_speed",
            "fb_memory_usage",
            "utilization",
            "temperature",
            "processes",
        ]:
            tmp[k] = gpu[k]
    return tmp


def get_gpu_info():
    sp = subprocess.Popen(
        ["nvidia-smi", "-q", "-x"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    out_str = sp.communicate()
    out_str = out_str[0].decode("utf-8")
    o = xmltodict.parse(out_str)["nvidia_smi_log"]
    o = json.loads(json.dumps(o))
    gpu_list = []
    if not isinstance(o["gpu"], list):
        o["gpu"] = [o["gpu"]]
    for gpu in o["gpu"]:
        if gpu["processes"] is None:
            gpu["processes"] = {}
            gpu["processes"]["process_info"] = []
        process = gpu["processes"]["process_info"]
        if not isinstance(process, list):
            process = [process]
        process = add_user(process)
        gpu["processes"]["process_info"] = process

        gpu = simplify(gpu)
        gpu_list.append(gpu)
    o["gpu"] = gpu_list
    return o


gpu = get_gpu_info()
print()
print(
    "    {: <13}\t{: <8}\t{: <20}\t{}".format(
        "user", "pid", "used_memory", "process_name"
    )
)
print(
    "---------------------------------------------------------"
)
for i, g in enumerate(gpu["gpu"]):
    print(
        "{} {} ({}):".format(
            i,
            g["product_name"],
            g["utilization"]["gpu_util"],
        )
    )
    total = int(g["fb_memory_usage"]["total"].split(" ")[0])
    for p in g["processes"]["process_info"]:
        used = int(p["used_memory"].split(" ")[0])
        print(
            "    {: <13}\t{: <8}\t{: <20}\t{}".format(
                p["user"],
                p["pid"],
                "{: <10} ({:5.2f}%)".format(
                    p["used_memory"], 100 * used / total
                ),
                p["process_name"],
            )
        )
    print(
        "---------------------------------------------------------"
    )
print()

使用效果: