Details
-
Type: Bug
-
Status: Open
-
Priority: Major
-
Resolution: Unresolved
-
Fix Version/s: OpenVZ-legacy, CU-rh6-next-stable
-
Component/s: Containers::Kernel
-
Security Level: Public
Description
I see container that have a number of processes in R state and it can't be stopped (as all vzctl commands entering the container are also stuck in either R or S state). It appears to be a bug in CPU fair sheduler.
This is what I see with ps axf (excerpt for a given container):
113329 ? Rs 0:00 init
113416 ? S 0:00 \_ @sbin/plymouthd --mode=boot --attach-to-session
113420 ? Rs 0:00 \_ plymouth-upstart-bridge
113541 ? R 0:00 \_ upstart-udev-bridge --daemon
113572 ? Rs 0:00 \_ /lib/systemd/systemd-udevd --daemon
113605 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113607 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113609 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113610 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113611 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113612 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113613 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113614 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113615 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113616 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113617 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113618 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113791 ? Rsl 0:00 \_ rsyslogd
113853 ? R 0:00 \_ upstart-socket-bridge --daemon
113854 ? R 0:00 \_ upstart-file-bridge --daemon
113968 ? Ss 0:00 \_ /bin/sh -e /proc/self/fd/9
113977 ? S 0:00 | \_ /bin/sh /etc/init.d/rc 2
113992 ? R 0:00 | \_ startpar -v
113972 ? Ss 0:00 \_ /bin/sh -e /proc/self/fd/9
113979 ? S 0:00 | \_ /bin/sh /usr/bin/savelog -q -p -c 5 /var/log/dmesg
113993 ? R 0:00 | \_ /bin/sh /usr/bin/savelog -q -p -c 5 /var/log/dmesg
113978 ? Ss 0:00 \_ /bin/sh -e /proc/self/fd/9
113990 ? R 0:00 | \_ perl -MSocket -e exit (!socket($sock, AF_INET6, SOCK_STREAM, 0))
113988 ? Rs 0:00 \_ /usr/sbin/sshd -D
113989 ? R 0:00 \_ cron
113994 ? R 0:00 \_ init
114022 ? Ss 0:00 bash -s
114023 ? S 0:00 \_ /usr/sbin/vzctl exec 8636601 /bin/bash /CL/user_startup.sh
114024 ? S 0:00 \_ /usr/sbin/vzctl exec 8636601 /bin/bash /CL/user_startup.sh
114025 ? Rs 0:00 \_ /usr/sbin/vzctl exec 8636601 /bin/bash /CL/user_startup.sh
127102 ? Ss 0:00 bash -s
127108 ? S 0:00 \_ /usr/sbin/vzctl exec 8636601 sh /CL/hooks/startup.sh
127110 ? S 0:00 \_ /usr/sbin/vzctl exec 8636601 sh /CL/hooks/startup.sh
127112 ? Rs 0:00 \_ sh /CL/hooks/startup.sh
457179 ? Ss 0:00 bash -s
457180 ? S 0:00 \_ /usr/sbin/vzctl stop 8636601 --fast
457181 ? Rs 0:00 \_ /usr/sbin/vzctl stop 8636601 --fast
[root@ip-10-0-136-154 ~]# grep . /proc/vz/fairsched/8636601/*
grep: /proc/vz/fairsched/8636601/cgroup.event_control: Invalid argument
/proc/vz/fairsched/8636601/cgroup.procs:113329
/proc/vz/fairsched/8636601/cgroup.procs:113342
/proc/vz/fairsched/8636601/cgroup.procs:113343
/proc/vz/fairsched/8636601/cgroup.procs:113416
/proc/vz/fairsched/8636601/cgroup.procs:113420
/proc/vz/fairsched/8636601/cgroup.procs:113541
/proc/vz/fairsched/8636601/cgroup.procs:113572
/proc/vz/fairsched/8636601/cgroup.procs:113605
/proc/vz/fairsched/8636601/cgroup.procs:113607
/proc/vz/fairsched/8636601/cgroup.procs:113609
/proc/vz/fairsched/8636601/cgroup.procs:113610
/proc/vz/fairsched/8636601/cgroup.procs:113611
/proc/vz/fairsched/8636601/cgroup.procs:113612
/proc/vz/fairsched/8636601/cgroup.procs:113613
/proc/vz/fairsched/8636601/cgroup.procs:113614
/proc/vz/fairsched/8636601/cgroup.procs:113615
/proc/vz/fairsched/8636601/cgroup.procs:113616
/proc/vz/fairsched/8636601/cgroup.procs:113617
/proc/vz/fairsched/8636601/cgroup.procs:113618
/proc/vz/fairsched/8636601/cgroup.procs:113791
/proc/vz/fairsched/8636601/cgroup.procs:113853
/proc/vz/fairsched/8636601/cgroup.procs:113854
/proc/vz/fairsched/8636601/cgroup.procs:113968
/proc/vz/fairsched/8636601/cgroup.procs:113972
/proc/vz/fairsched/8636601/cgroup.procs:113977
/proc/vz/fairsched/8636601/cgroup.procs:113978
/proc/vz/fairsched/8636601/cgroup.procs:113979
/proc/vz/fairsched/8636601/cgroup.procs:113988
/proc/vz/fairsched/8636601/cgroup.procs:113989
/proc/vz/fairsched/8636601/cgroup.procs:113990
/proc/vz/fairsched/8636601/cgroup.procs:113992
/proc/vz/fairsched/8636601/cgroup.procs:113993
/proc/vz/fairsched/8636601/cgroup.procs:113994
/proc/vz/fairsched/8636601/cgroup.procs:114025
/proc/vz/fairsched/8636601/cgroup.procs:127112
/proc/vz/fairsched/8636601/cgroup.procs:457181
/proc/vz/fairsched/8636601/cpu.acct.stat:user 49
/proc/vz/fairsched/8636601/cpu.acct.stat:system 99
/proc/vz/fairsched/8636601/cpuacct.stat:user 28
/proc/vz/fairsched/8636601/cpuacct.stat:system 79
/proc/vz/fairsched/8636601/cpuacct.usage:1424285878
/proc/vz/fairsched/8636601/cpuacct.usage_percpu:32559490 65155948 38548719 34689812 298588686 71440936 155604905 322195033 11822043 58254471 627682 3369031 131425347 72407377 80158418 47437980
/proc/vz/fairsched/8636601/cpu.cfs_period_us:100000
/proc/vz/fairsched/8636601/cpu.cfs_quota_us:200000
/proc/vz/fairsched/8636601/cpu.delayacct.total:cpu_count 5281
/proc/vz/fairsched/8636601/cpu.delayacct.total:cpu_delay 2950918210
/proc/vz/fairsched/8636601/cpu.delayacct.total:cpu_run_real 1515769568
/proc/vz/fairsched/8636601/cpu.delayacct.total:cpu_run_virtual 1536481790
/proc/vz/fairsched/8636601/cpu.delayacct.total:cpu_scaled_run_real 1515769568
/proc/vz/fairsched/8636601/cpu.delayacct.total:blkio_count 670
/proc/vz/fairsched/8636601/cpu.delayacct.total:blkio_delay 490350103
/proc/vz/fairsched/8636601/cpu.delayacct.total:swapin_count 0
/proc/vz/fairsched/8636601/cpu.delayacct.total:swapin_delay 0
/proc/vz/fairsched/8636601/cpu.delayacct.total:freepages_count 0
/proc/vz/fairsched/8636601/cpu.delayacct.total:freepages_delay 0
/proc/vz/fairsched/8636601/cpu.effective_shares:10
/proc/vz/fairsched/8636601/cpu.min_shares_pct:0
/proc/vz/fairsched/8636601/cpu.nr_cpus:3
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu 49 0 99 4817296035 15 0 0 1
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu0 0 0 2 401441052 3 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu1 2 0 4 100 1 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu2 1 0 2 105 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu3 0 0 2 401441055 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu4 10 0 20 401441028 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu5 1 0 5 401441052 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu6 6 0 9 401441043 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu7 14 0 20 401441022 1 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu8 0 0 0 401441057 1 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu9 2 0 3 401441053 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu10 0 0 0 401441059 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu11 0 0 0 132 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu12 4 0 9 401441044 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu13 1 0 5 3124 2 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu14 1 0 6 401441049 1 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu15 1 0 3 401441053 1 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:intr 0
/proc/vz/fairsched/8636601/cpu.proc.stat:swap 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:ctxt 5865
/proc/vz/fairsched/8636601/cpu.proc.stat:btime 1450621331
/proc/vz/fairsched/8636601/cpu.proc.stat:processes 552
/proc/vz/fairsched/8636601/cpu.proc.stat:procs_running 16
/proc/vz/fairsched/8636601/cpu.proc.stat:procs_blocked 0
/proc/vz/fairsched/8636601/cpu.rate:2048
/proc/vz/fairsched/8636601/cpu.rt_period_us:1000000
/proc/vz/fairsched/8636601/cpu.rt_runtime_us:0
/proc/vz/fairsched/8636601/cpuset.cpu_exclusive:0
/proc/vz/fairsched/8636601/cpuset.mem_exclusive:0
/proc/vz/fairsched/8636601/cpuset.mem_hardwall:0
/proc/vz/fairsched/8636601/cpuset.memory_migrate:0
/proc/vz/fairsched/8636601/cpuset.memory_pressure:0
/proc/vz/fairsched/8636601/cpuset.memory_spread_page:0
/proc/vz/fairsched/8636601/cpuset.memory_spread_slab:0
/proc/vz/fairsched/8636601/cpuset.sched_load_balance:1
/proc/vz/fairsched/8636601/cpuset.sched_relax_domain_level:-1
/proc/vz/fairsched/8636601/cpu.shares:10
/proc/vz/fairsched/8636601/cpu.stat:nr_periods 10
/proc/vz/fairsched/8636601/cpu.stat:nr_throttled 2
/proc/vz/fairsched/8636601/cpu.stat:throttled_time 175470984510032
/proc/vz/fairsched/8636601/cpu.usage:1531033835
/proc/vz/fairsched/8636601/cpu.usage_percpu:34559019 77244062 39466310 36837065 313469281 74914322 162812168 356696719 13677880 61049948 1596529 4467892 143299934 73079428 86401287 51461991
/proc/vz/fairsched/8636601/notify_on_release:0
/proc/vz/fairsched/8636601/self_destruction:0
/proc/vz/fairsched/8636601/tasks:113329
/proc/vz/fairsched/8636601/tasks:113342
/proc/vz/fairsched/8636601/tasks:113343
/proc/vz/fairsched/8636601/tasks:113416
/proc/vz/fairsched/8636601/tasks:113420
/proc/vz/fairsched/8636601/tasks:113541
/proc/vz/fairsched/8636601/tasks:113572
/proc/vz/fairsched/8636601/tasks:113605
/proc/vz/fairsched/8636601/tasks:113607
/proc/vz/fairsched/8636601/tasks:113609
/proc/vz/fairsched/8636601/tasks:113610
/proc/vz/fairsched/8636601/tasks:113611
/proc/vz/fairsched/8636601/tasks:113612
/proc/vz/fairsched/8636601/tasks:113613
/proc/vz/fairsched/8636601/tasks:113614
/proc/vz/fairsched/8636601/tasks:113615
/proc/vz/fairsched/8636601/tasks:113616
/proc/vz/fairsched/8636601/tasks:113617
/proc/vz/fairsched/8636601/tasks:113618
/proc/vz/fairsched/8636601/tasks:113791
/proc/vz/fairsched/8636601/tasks:113808
/proc/vz/fairsched/8636601/tasks:113809
/proc/vz/fairsched/8636601/tasks:113853
/proc/vz/fairsched/8636601/tasks:113854
/proc/vz/fairsched/8636601/tasks:113968
/proc/vz/fairsched/8636601/tasks:113972
/proc/vz/fairsched/8636601/tasks:113977
/proc/vz/fairsched/8636601/tasks:113978
/proc/vz/fairsched/8636601/tasks:113979
/proc/vz/fairsched/8636601/tasks:113988
/proc/vz/fairsched/8636601/tasks:113989
/proc/vz/fairsched/8636601/tasks:113990
/proc/vz/fairsched/8636601/tasks:113992
/proc/vz/fairsched/8636601/tasks:113993
/proc/vz/fairsched/8636601/tasks:113994
/proc/vz/fairsched/8636601/tasks:114025
/proc/vz/fairsched/8636601/tasks:127112
/proc/vz/fairsched/8636601/tasks:457181
[root@ip-10-0-136-154 ~]# cat /proc/fairsched
Version: 2.6 debug
veid id parent weight rate tasks run cpus flg ready start_tag value delay
0 0 0 500 0 88 1 16 .. 1 0 0 0
0 2147483647 0 500 0 88 1 16 .. 1 0 0 0
0 1017 0 500 0 32 0 16 .. 0 0 0 0
1017 0 1017 500 0 32 0 16 .. 0 0 0 0
1017 1 0 500 0 32 0 16 .. 0 0 0 0
0 5577166 0 51200 2048 38 9 16 L. 9 0 0 0
5577166 0 5577166 51200 2048 38 9 16 L. 9 0 0 0
5577166 1 0 51200 2048 38 9 16 L. 9 0 0 0
0 7348695 0 51200 2048 39 13 16 L. 13 0 0 0
7348695 0 7348695 51200 2048 39 13 16 L. 13 0 0 0
7348695 1 0 51200 2048 39 13 16 L. 13 0 0 0
0 8636601 0 51200 2048 38 16 16 L. 16 0 0 0
8636601 0 8636601 51200 2048 38 16 16 L. 16 0 0 0
8636601 1 0 51200 2048 38 16 16 L. 16 0 0 0
0 9085576 0 51200 2048 32 8 16 L. 8 0 0 0
9085576 0 9085576 51200 2048 32 8 16 L. 8 0 0 0
9085576 1 0 51200 2048 32 8 16 L. 8 0 0 0
0 1019 0 500 0 97 0 16 .. 0 0 0 0
1019 0 1019 500 0 97 0 16 .. 0 0 0 0
1019 1 0 500 0 97 0 16 .. 0 0 0 0
0 10584713 0 500 4096 74 0 16 L. 0 0 0 0
10584713 0 10584713 500 4096 74 0 16 L. 0 0 0 0
10584713 1 0 500 4096 74 0 16 L. 0 0 0 0
[root@ip-10-0-136-154 ~]# cat /proc/fairsched2
Version: 2.7 debug
id weight rate run cpus flg ready start_tag value delay
2147483647 500 0 1 16 .. 1 0 0 0
1017 500 0 0 16 .. 0 0 0 0
5577166 51200 2048 9 16 L. 9 0 0 0
7348695 51200 2048 13 16 L. 13 0 0 0
8636601 51200 2048 16 16 L. 16 0 0 0
9085576 51200 2048 8 16 L. 8 0 0 0
1019 500 0 0 16 .. 0 0 0 0
10584713 500 4096 0 16 L. 0 0 0 0
This is CT config:
# RAM
PHYSPAGES="0:65536"
# Swap
SWAPPAGES="0:65536"
# Disk quota parameters (in form of softlimit:hardlimit)
DISKSPACE="10485760:10485760"
DISKINODES="200000:220000"
QUOTATIME="0"
# CPU fair scheduler parameter
CPUUNITS="10"
VE_ROOT="/vz/root/$VEID"
VE_PRIVATE="/vz/private/$VEID"
OSTEMPLATE="ubuntu-14.04-x86_64"
ORIGIN_SAMPLE="vswap-256m"
CPUS="3"
HOSTNAME="xxxxxxxxx"
NAMESERVER="54.200.68.81"
SKIP_ARPDETECT=yes
IOLIMIT="10485760"
VE_LAYOUT="ploop"
CPULIMIT="200"
IP_ADDRESS="x.x.x.x"
Here are this and other containers' cpuunits and cpulimits:
[root@ip-10-0-136-154 ~]# vzlist -o ctid,cpuunits,cpulimit,status -a
CTID CPUUNI CPULIM STATUS
1017 1000 0 running
1019 1000 0 running
5577166 10 200 running
6229918 10 0 stopped
7348695 10 200 running
8636601 10 200 running
9085576 10 200 running
10584713 1000 400 running
Environment:
[root@ip-10-0-136-154 ~]# uname -a
Linux ip-10-0-136-154 2.6.32-042stab108.8 #1 SMP Wed Jul 22 17:23:23 MSK 2015 x86_64 x86_64 x86_64 GNU/Linux
[root@ip-10-0-136-154 ~]# rpm -q vzctl
vzctl-4.9.4-1.x86_64
[root@ip-10-0-136-154 ~]# uptime
01:40:23 up 127 days, 3:01, 3 users, load average: 0.25, 0.36, 0.35
NOTE this was observed on a number of containers and machines, I am just using this CT to report.
I found a workaround:
[root@ip-10-0-136-154 ~]# vzctl set 8636601 --cpulimit 0
Locked by: pid 457180, cmdline /usr/sbin/vzctl stop 8636601 --fast
Container already locked
root@ip-10-0-136-154 ~]# vzctl --skiplock set 8636601 --cpulimit 0
UB limits were set successfully
Setting CPU limit: 0
WARNING: Settings were not saved to config and will be lost after CT restart (use --save flag)
[root@ip-10-0-136-154 ~]# vzctl status 8636601
CTID 8636601 exist unmounted down
A similar bug is reported to users@ ML: https://lists.openvz.org/pipermail/users/2016-February/006740.html
This coincidence makes me think about some kind of integer overflow related to time.
This is what I see with ps axf (excerpt for a given container):
113329 ? Rs 0:00 init
113416 ? S 0:00 \_ @sbin/plymouthd --mode=boot --attach-to-session
113420 ? Rs 0:00 \_ plymouth-upstart-bridge
113541 ? R 0:00 \_ upstart-udev-bridge --daemon
113572 ? Rs 0:00 \_ /lib/systemd/systemd-udevd --daemon
113605 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113607 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113609 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113610 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113611 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113612 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113613 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113614 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113615 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113616 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113617 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113618 ? S 0:00 | \_ /lib/systemd/systemd-udevd --daemon
113791 ? Rsl 0:00 \_ rsyslogd
113853 ? R 0:00 \_ upstart-socket-bridge --daemon
113854 ? R 0:00 \_ upstart-file-bridge --daemon
113968 ? Ss 0:00 \_ /bin/sh -e /proc/self/fd/9
113977 ? S 0:00 | \_ /bin/sh /etc/init.d/rc 2
113992 ? R 0:00 | \_ startpar -v
113972 ? Ss 0:00 \_ /bin/sh -e /proc/self/fd/9
113979 ? S 0:00 | \_ /bin/sh /usr/bin/savelog -q -p -c 5 /var/log/dmesg
113993 ? R 0:00 | \_ /bin/sh /usr/bin/savelog -q -p -c 5 /var/log/dmesg
113978 ? Ss 0:00 \_ /bin/sh -e /proc/self/fd/9
113990 ? R 0:00 | \_ perl -MSocket -e exit (!socket($sock, AF_INET6, SOCK_STREAM, 0))
113988 ? Rs 0:00 \_ /usr/sbin/sshd -D
113989 ? R 0:00 \_ cron
113994 ? R 0:00 \_ init
114022 ? Ss 0:00 bash -s
114023 ? S 0:00 \_ /usr/sbin/vzctl exec 8636601 /bin/bash /CL/user_startup.sh
114024 ? S 0:00 \_ /usr/sbin/vzctl exec 8636601 /bin/bash /CL/user_startup.sh
114025 ? Rs 0:00 \_ /usr/sbin/vzctl exec 8636601 /bin/bash /CL/user_startup.sh
127102 ? Ss 0:00 bash -s
127108 ? S 0:00 \_ /usr/sbin/vzctl exec 8636601 sh /CL/hooks/startup.sh
127110 ? S 0:00 \_ /usr/sbin/vzctl exec 8636601 sh /CL/hooks/startup.sh
127112 ? Rs 0:00 \_ sh /CL/hooks/startup.sh
457179 ? Ss 0:00 bash -s
457180 ? S 0:00 \_ /usr/sbin/vzctl stop 8636601 --fast
457181 ? Rs 0:00 \_ /usr/sbin/vzctl stop 8636601 --fast
[root@ip-10-0-136-154 ~]# grep . /proc/vz/fairsched/8636601/*
grep: /proc/vz/fairsched/8636601/cgroup.event_control: Invalid argument
/proc/vz/fairsched/8636601/cgroup.procs:113329
/proc/vz/fairsched/8636601/cgroup.procs:113342
/proc/vz/fairsched/8636601/cgroup.procs:113343
/proc/vz/fairsched/8636601/cgroup.procs:113416
/proc/vz/fairsched/8636601/cgroup.procs:113420
/proc/vz/fairsched/8636601/cgroup.procs:113541
/proc/vz/fairsched/8636601/cgroup.procs:113572
/proc/vz/fairsched/8636601/cgroup.procs:113605
/proc/vz/fairsched/8636601/cgroup.procs:113607
/proc/vz/fairsched/8636601/cgroup.procs:113609
/proc/vz/fairsched/8636601/cgroup.procs:113610
/proc/vz/fairsched/8636601/cgroup.procs:113611
/proc/vz/fairsched/8636601/cgroup.procs:113612
/proc/vz/fairsched/8636601/cgroup.procs:113613
/proc/vz/fairsched/8636601/cgroup.procs:113614
/proc/vz/fairsched/8636601/cgroup.procs:113615
/proc/vz/fairsched/8636601/cgroup.procs:113616
/proc/vz/fairsched/8636601/cgroup.procs:113617
/proc/vz/fairsched/8636601/cgroup.procs:113618
/proc/vz/fairsched/8636601/cgroup.procs:113791
/proc/vz/fairsched/8636601/cgroup.procs:113853
/proc/vz/fairsched/8636601/cgroup.procs:113854
/proc/vz/fairsched/8636601/cgroup.procs:113968
/proc/vz/fairsched/8636601/cgroup.procs:113972
/proc/vz/fairsched/8636601/cgroup.procs:113977
/proc/vz/fairsched/8636601/cgroup.procs:113978
/proc/vz/fairsched/8636601/cgroup.procs:113979
/proc/vz/fairsched/8636601/cgroup.procs:113988
/proc/vz/fairsched/8636601/cgroup.procs:113989
/proc/vz/fairsched/8636601/cgroup.procs:113990
/proc/vz/fairsched/8636601/cgroup.procs:113992
/proc/vz/fairsched/8636601/cgroup.procs:113993
/proc/vz/fairsched/8636601/cgroup.procs:113994
/proc/vz/fairsched/8636601/cgroup.procs:114025
/proc/vz/fairsched/8636601/cgroup.procs:127112
/proc/vz/fairsched/8636601/cgroup.procs:457181
/proc/vz/fairsched/8636601/cpu.acct.stat:user 49
/proc/vz/fairsched/8636601/cpu.acct.stat:system 99
/proc/vz/fairsched/8636601/cpuacct.stat:user 28
/proc/vz/fairsched/8636601/cpuacct.stat:system 79
/proc/vz/fairsched/8636601/cpuacct.usage:1424285878
/proc/vz/fairsched/8636601/cpuacct.usage_percpu:32559490 65155948 38548719 34689812 298588686 71440936 155604905 322195033 11822043 58254471 627682 3369031 131425347 72407377 80158418 47437980
/proc/vz/fairsched/8636601/cpu.cfs_period_us:100000
/proc/vz/fairsched/8636601/cpu.cfs_quota_us:200000
/proc/vz/fairsched/8636601/cpu.delayacct.total:cpu_count 5281
/proc/vz/fairsched/8636601/cpu.delayacct.total:cpu_delay 2950918210
/proc/vz/fairsched/8636601/cpu.delayacct.total:cpu_run_real 1515769568
/proc/vz/fairsched/8636601/cpu.delayacct.total:cpu_run_virtual 1536481790
/proc/vz/fairsched/8636601/cpu.delayacct.total:cpu_scaled_run_real 1515769568
/proc/vz/fairsched/8636601/cpu.delayacct.total:blkio_count 670
/proc/vz/fairsched/8636601/cpu.delayacct.total:blkio_delay 490350103
/proc/vz/fairsched/8636601/cpu.delayacct.total:swapin_count 0
/proc/vz/fairsched/8636601/cpu.delayacct.total:swapin_delay 0
/proc/vz/fairsched/8636601/cpu.delayacct.total:freepages_count 0
/proc/vz/fairsched/8636601/cpu.delayacct.total:freepages_delay 0
/proc/vz/fairsched/8636601/cpu.effective_shares:10
/proc/vz/fairsched/8636601/cpu.min_shares_pct:0
/proc/vz/fairsched/8636601/cpu.nr_cpus:3
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu 49 0 99 4817296035 15 0 0 1
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu0 0 0 2 401441052 3 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu1 2 0 4 100 1 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu2 1 0 2 105 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu3 0 0 2 401441055 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu4 10 0 20 401441028 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu5 1 0 5 401441052 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu6 6 0 9 401441043 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu7 14 0 20 401441022 1 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu8 0 0 0 401441057 1 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu9 2 0 3 401441053 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu10 0 0 0 401441059 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu11 0 0 0 132 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu12 4 0 9 401441044 0 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu13 1 0 5 3124 2 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu14 1 0 6 401441049 1 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:cpu15 1 0 3 401441053 1 0 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:intr 0
/proc/vz/fairsched/8636601/cpu.proc.stat:swap 0 0
/proc/vz/fairsched/8636601/cpu.proc.stat:ctxt 5865
/proc/vz/fairsched/8636601/cpu.proc.stat:btime 1450621331
/proc/vz/fairsched/8636601/cpu.proc.stat:processes 552
/proc/vz/fairsched/8636601/cpu.proc.stat:procs_running 16
/proc/vz/fairsched/8636601/cpu.proc.stat:procs_blocked 0
/proc/vz/fairsched/8636601/cpu.rate:2048
/proc/vz/fairsched/8636601/cpu.rt_period_us:1000000
/proc/vz/fairsched/8636601/cpu.rt_runtime_us:0
/proc/vz/fairsched/8636601/cpuset.cpu_exclusive:0
/proc/vz/fairsched/8636601/cpuset.mem_exclusive:0
/proc/vz/fairsched/8636601/cpuset.mem_hardwall:0
/proc/vz/fairsched/8636601/cpuset.memory_migrate:0
/proc/vz/fairsched/8636601/cpuset.memory_pressure:0
/proc/vz/fairsched/8636601/cpuset.memory_spread_page:0
/proc/vz/fairsched/8636601/cpuset.memory_spread_slab:0
/proc/vz/fairsched/8636601/cpuset.sched_load_balance:1
/proc/vz/fairsched/8636601/cpuset.sched_relax_domain_level:-1
/proc/vz/fairsched/8636601/cpu.shares:10
/proc/vz/fairsched/8636601/cpu.stat:nr_periods 10
/proc/vz/fairsched/8636601/cpu.stat:nr_throttled 2
/proc/vz/fairsched/8636601/cpu.stat:throttled_time 175470984510032
/proc/vz/fairsched/8636601/cpu.usage:1531033835
/proc/vz/fairsched/8636601/cpu.usage_percpu:34559019 77244062 39466310 36837065 313469281 74914322 162812168 356696719 13677880 61049948 1596529 4467892 143299934 73079428 86401287 51461991
/proc/vz/fairsched/8636601/notify_on_release:0
/proc/vz/fairsched/8636601/self_destruction:0
/proc/vz/fairsched/8636601/tasks:113329
/proc/vz/fairsched/8636601/tasks:113342
/proc/vz/fairsched/8636601/tasks:113343
/proc/vz/fairsched/8636601/tasks:113416
/proc/vz/fairsched/8636601/tasks:113420
/proc/vz/fairsched/8636601/tasks:113541
/proc/vz/fairsched/8636601/tasks:113572
/proc/vz/fairsched/8636601/tasks:113605
/proc/vz/fairsched/8636601/tasks:113607
/proc/vz/fairsched/8636601/tasks:113609
/proc/vz/fairsched/8636601/tasks:113610
/proc/vz/fairsched/8636601/tasks:113611
/proc/vz/fairsched/8636601/tasks:113612
/proc/vz/fairsched/8636601/tasks:113613
/proc/vz/fairsched/8636601/tasks:113614
/proc/vz/fairsched/8636601/tasks:113615
/proc/vz/fairsched/8636601/tasks:113616
/proc/vz/fairsched/8636601/tasks:113617
/proc/vz/fairsched/8636601/tasks:113618
/proc/vz/fairsched/8636601/tasks:113791
/proc/vz/fairsched/8636601/tasks:113808
/proc/vz/fairsched/8636601/tasks:113809
/proc/vz/fairsched/8636601/tasks:113853
/proc/vz/fairsched/8636601/tasks:113854
/proc/vz/fairsched/8636601/tasks:113968
/proc/vz/fairsched/8636601/tasks:113972
/proc/vz/fairsched/8636601/tasks:113977
/proc/vz/fairsched/8636601/tasks:113978
/proc/vz/fairsched/8636601/tasks:113979
/proc/vz/fairsched/8636601/tasks:113988
/proc/vz/fairsched/8636601/tasks:113989
/proc/vz/fairsched/8636601/tasks:113990
/proc/vz/fairsched/8636601/tasks:113992
/proc/vz/fairsched/8636601/tasks:113993
/proc/vz/fairsched/8636601/tasks:113994
/proc/vz/fairsched/8636601/tasks:114025
/proc/vz/fairsched/8636601/tasks:127112
/proc/vz/fairsched/8636601/tasks:457181
[root@ip-10-0-136-154 ~]# cat /proc/fairsched
Version: 2.6 debug
veid id parent weight rate tasks run cpus flg ready start_tag value delay
0 0 0 500 0 88 1 16 .. 1 0 0 0
0 2147483647 0 500 0 88 1 16 .. 1 0 0 0
0 1017 0 500 0 32 0 16 .. 0 0 0 0
1017 0 1017 500 0 32 0 16 .. 0 0 0 0
1017 1 0 500 0 32 0 16 .. 0 0 0 0
0 5577166 0 51200 2048 38 9 16 L. 9 0 0 0
5577166 0 5577166 51200 2048 38 9 16 L. 9 0 0 0
5577166 1 0 51200 2048 38 9 16 L. 9 0 0 0
0 7348695 0 51200 2048 39 13 16 L. 13 0 0 0
7348695 0 7348695 51200 2048 39 13 16 L. 13 0 0 0
7348695 1 0 51200 2048 39 13 16 L. 13 0 0 0
0 8636601 0 51200 2048 38 16 16 L. 16 0 0 0
8636601 0 8636601 51200 2048 38 16 16 L. 16 0 0 0
8636601 1 0 51200 2048 38 16 16 L. 16 0 0 0
0 9085576 0 51200 2048 32 8 16 L. 8 0 0 0
9085576 0 9085576 51200 2048 32 8 16 L. 8 0 0 0
9085576 1 0 51200 2048 32 8 16 L. 8 0 0 0
0 1019 0 500 0 97 0 16 .. 0 0 0 0
1019 0 1019 500 0 97 0 16 .. 0 0 0 0
1019 1 0 500 0 97 0 16 .. 0 0 0 0
0 10584713 0 500 4096 74 0 16 L. 0 0 0 0
10584713 0 10584713 500 4096 74 0 16 L. 0 0 0 0
10584713 1 0 500 4096 74 0 16 L. 0 0 0 0
[root@ip-10-0-136-154 ~]# cat /proc/fairsched2
Version: 2.7 debug
id weight rate run cpus flg ready start_tag value delay
2147483647 500 0 1 16 .. 1 0 0 0
1017 500 0 0 16 .. 0 0 0 0
5577166 51200 2048 9 16 L. 9 0 0 0
7348695 51200 2048 13 16 L. 13 0 0 0
8636601 51200 2048 16 16 L. 16 0 0 0
9085576 51200 2048 8 16 L. 8 0 0 0
1019 500 0 0 16 .. 0 0 0 0
10584713 500 4096 0 16 L. 0 0 0 0
This is CT config:
# RAM
PHYSPAGES="0:65536"
# Swap
SWAPPAGES="0:65536"
# Disk quota parameters (in form of softlimit:hardlimit)
DISKSPACE="10485760:10485760"
DISKINODES="200000:220000"
QUOTATIME="0"
# CPU fair scheduler parameter
CPUUNITS="10"
VE_ROOT="/vz/root/$VEID"
VE_PRIVATE="/vz/private/$VEID"
OSTEMPLATE="ubuntu-14.04-x86_64"
ORIGIN_SAMPLE="vswap-256m"
CPUS="3"
HOSTNAME="xxxxxxxxx"
NAMESERVER="54.200.68.81"
SKIP_ARPDETECT=yes
IOLIMIT="10485760"
VE_LAYOUT="ploop"
CPULIMIT="200"
IP_ADDRESS="x.x.x.x"
Here are this and other containers' cpuunits and cpulimits:
[root@ip-10-0-136-154 ~]# vzlist -o ctid,cpuunits,cpulimit,status -a
CTID CPUUNI CPULIM STATUS
1017 1000 0 running
1019 1000 0 running
5577166 10 200 running
6229918 10 0 stopped
7348695 10 200 running
8636601 10 200 running
9085576 10 200 running
10584713 1000 400 running
Environment:
[root@ip-10-0-136-154 ~]# uname -a
Linux ip-10-0-136-154 2.6.32-042stab108.8 #1 SMP Wed Jul 22 17:23:23 MSK 2015 x86_64 x86_64 x86_64 GNU/Linux
[root@ip-10-0-136-154 ~]# rpm -q vzctl
vzctl-4.9.4-1.x86_64
[root@ip-10-0-136-154 ~]# uptime
01:40:23 up 127 days, 3:01, 3 users, load average: 0.25, 0.36, 0.35
NOTE this was observed on a number of containers and machines, I am just using this CT to report.
I found a workaround:
[root@ip-10-0-136-154 ~]# vzctl set 8636601 --cpulimit 0
Locked by: pid 457180, cmdline /usr/sbin/vzctl stop 8636601 --fast
Container already locked
root@ip-10-0-136-154 ~]# vzctl --skiplock set 8636601 --cpulimit 0
UB limits were set successfully
Setting CPU limit: 0
WARNING: Settings were not saved to config and will be lost after CT restart (use --save flag)
[root@ip-10-0-136-154 ~]# vzctl status 8636601
CTID 8636601 exist unmounted down
A similar bug is reported to users@ ML: https://lists.openvz.org/pipermail/users/2016-February/006740.html
This coincidence makes me think about some kind of integer overflow related to time.