#!/usr/bin/env python3 """ Script that reports processes with poor/unconfigured nofile ulimits. When upgrading containerd from 1.7 to 2.2, we get new systemd LimitNOFILE defaults. Instead of infinity, it is set to 1024:524288. For processes that set their own nofile ulimit (higher than the default 1024), this is not a problem. For some, like the vernemq:1.13.0 image, this was a problem. To fix, we can revert containerd LimitNOFILE to infinity. Or, we can find the processes and explicitly change their ulimit before upgrading containerd 2.2. This script aids in making an informed decision. """ from collections import namedtuple import os import sys INFINITY = object() # containerd 2.2 new default: SOFT_NEW = 1024 HARD_NEW = 524288 # Warn if process has unchanged ulimit and has >=N fds WARN_THRESHOLD = 768 # Kernel changes LimitNOFILE=infinity to /proc/sys/fs/nr_open. with open('/proc/sys/fs/nr_open') as fp: SOFT_UNSET = (INFINITY, int(fp.read().strip())) HARD_UNSET = SOFT_UNSET if sys.argv[1:2] == ['new']: # new system? SOFT_UNSET = (SOFT_NEW,) HARD_UNSET = (HARD_NEW,) class ProcessInfo(namedtuple( 'ProcInfo', 'pid comm exe fd_count soft_n hard_n')): @classmethod def from_pid(cls, pid): base = f'/proc/{pid}' try: with open(f'{base}/limits') as fp: for line in fp: if line.startswith('Max open files'): parts = line.split() # parts: ['Max', 'open', 'files', soft, hard, 'files'] soft = parts[3] hard = parts[4] break else: assert False soft_n = INFINITY if soft == 'unlimited' else int(soft) hard_n = INFINITY if hard == 'unlimited' else int(hard) fd_count = len(os.listdir(f'{base}/fd')) with open(f'{base}/comm') as f: comm = f.read().strip() exe = os.readlink(f'{base}/exe') except (FileNotFoundError, ProcessLookupError, PermissionError): return None return cls(pid, comm, exe, fd_count, soft_n, hard_n) def soft_n_is_unset(self): return self.soft_n in SOFT_UNSET def hard_n_is_unset(self): return self.hard_n in HARD_UNSET def main(): if os.geteuid() != 0: print('warning: run as root to inspect all processes', file=sys.stderr) procs = [] for entry in os.listdir('/proc'): if not entry.isdigit(): continue procinfo = ProcessInfo.from_pid(entry) if not procinfo: continue # Flag: soft is effectively unchanged (very high / infinity) # AND fd count is approaching new 1024 default. if (procinfo.soft_n_is_unset() and procinfo.hard_n_is_unset() and procinfo.fd_count >= WARN_THRESHOLD): procs.append(procinfo) if not procs: print('No at-risk processes found.') return print(f'{"PID":>7} {"FDs":>5} {"SOFT":>10} COMM EXE') for procinfo in sorted( procs, key=(lambda p: (p.fd_count, p.pid)), reverse=True): soft_s = ( 'inf' if procinfo.soft_n is INFINITY else str(int(procinfo.soft_n))) print( '{p.pid:>7} {p.fd_count:>5} {soft_s:>10} {p.comm:<15} {p.exe}' .format(p=procinfo, soft_s=soft_s)) if __name__ == '__main__': main()