#!/usr/bin/env python3
"""
Script that reports processes with poor/unconfigured nofile ulimits.

When upgrading containerd from 1.7 to 2.2, we get new systemd
LimitNOFILE defaults. Instead of infinity, it is set to 1024:524288.

For processes that set their own nofile ulimit (higher than the default 1024),
this is not a problem. For some, like the vernemq:1.13.0 image, this was a
problem.

To fix, we can revert containerd LimitNOFILE to infinity. Or, we can
find the processes and explicitly change their ulimit before upgrading
containerd 2.2. This script aids in making an informed decision.
"""
from collections import namedtuple
import os
import sys

INFINITY = object()

# containerd 2.2 new default:
SOFT_NEW = 1024
HARD_NEW = 524288
# Warn if process has unchanged ulimit and has >=N fds
WARN_THRESHOLD = 768

# Kernel changes LimitNOFILE=infinity to /proc/sys/fs/nr_open.
with open('/proc/sys/fs/nr_open') as fp:
    SOFT_UNSET = (INFINITY, int(fp.read().strip()))
HARD_UNSET = SOFT_UNSET

if sys.argv[1:2] == ['new']:  # new system?
    SOFT_UNSET = (SOFT_NEW,)
    HARD_UNSET = (HARD_NEW,)


class ProcessInfo(namedtuple(
        'ProcInfo', 'pid comm exe fd_count soft_n hard_n')):
    @classmethod
    def from_pid(cls, pid):
        base = f'/proc/{pid}'
        try:
            with open(f'{base}/limits') as fp:
                for line in fp:
                    if line.startswith('Max open files'):
                        parts = line.split()
                        # parts: ['Max', 'open', 'files', soft, hard, 'files']
                        soft = parts[3]
                        hard = parts[4]
                        break
                else:
                    assert False
            soft_n = INFINITY if soft == 'unlimited' else int(soft)
            hard_n = INFINITY if hard == 'unlimited' else int(hard)
            fd_count = len(os.listdir(f'{base}/fd'))
            with open(f'{base}/comm') as f:
                comm = f.read().strip()
            exe = os.readlink(f'{base}/exe')
        except (FileNotFoundError, ProcessLookupError, PermissionError):
            return None

        return cls(pid, comm, exe, fd_count, soft_n, hard_n)

    def soft_n_is_unset(self):
        return self.soft_n in SOFT_UNSET

    def hard_n_is_unset(self):
        return self.hard_n in HARD_UNSET


def main():
    if os.geteuid() != 0:
        print('warning: run as root to inspect all processes', file=sys.stderr)

    procs = []
    for entry in os.listdir('/proc'):
        if not entry.isdigit():
            continue
        procinfo = ProcessInfo.from_pid(entry)
        if not procinfo:
            continue

        # Flag: soft is effectively unchanged (very high / infinity)
        # AND fd count is approaching new 1024 default.
        if (procinfo.soft_n_is_unset() and procinfo.hard_n_is_unset()
                and procinfo.fd_count >= WARN_THRESHOLD):
            procs.append(procinfo)

    if not procs:
        print('No at-risk processes found.')
        return

    print(f'{"PID":>7}  {"FDs":>5}  {"SOFT":>10}  COMM            EXE')
    for procinfo in sorted(
            procs, key=(lambda p: (p.fd_count, p.pid)), reverse=True):
        soft_s = (
            'inf' if procinfo.soft_n is INFINITY
            else str(int(procinfo.soft_n)))
        print(
            '{p.pid:>7}  {p.fd_count:>5}  {soft_s:>10}  {p.comm:<15} {p.exe}'
            .format(p=procinfo, soft_s=soft_s))


if __name__ == '__main__':
    main()
