export OMP_NUM_THREADS=4
export OMP_PROC_BIND=close
export OMP_PLACES=cores
mpirun -np 8 \
--rankfile rankfile.txt \
--report-bindings \
lmp -in in.ST1.MSCDSS -sf omp -pk omp 4
p620@p620:~$ ps -u $USER | grep lmp
115108 pts/1 00:01:46 lmp
115109 pts/1 00:01:47 lmp
115110 pts/1 00:01:47 lmp
115111 pts/1 00:01:47 lmp
115112 pts/1 00:01:47 lmp
115113 pts/1 00:01:47 lmp
115114 pts/1 00:01:47 lmp
115115 pts/1 00:01:46 lmp
p620@p620:~$ taskset -cp 115108
pid 115108's current affinity list: 0
p620@p620:~$ taskset -cp 115109
pid 115109's current affinity list: 4
p620@p620:~$ taskset -cp 115110
pid 115110's current affinity list: 8
p620@p620:~$ taskset -cp 115111
pid 115111's current affinity list: 12
p620@p620:~$ taskset -cp 115112
pid 115112's current affinity list: 16
p620@p620:~$ taskset -cp 115113
pid 115113's current affinity list: 20
p620@p620:~$ taskset -cp 115114
pid 115114's current affinity list: 24
p620@p620:~$ taskset -cp 115115
pid 115115's current affinity list: 28
p620@p620:~$ ps -L -p 115108
PID LWP TTY TIME CMD
115108 115108 pts/1 00:01:43 lmp
115108 115117 pts/1 00:00:00 lmp
115108 115124 pts/1 00:01:43 lmp
115108 115133 pts/1 00:01:43 lmp
115108 115140 pts/1 00:01:43 lmp
p620@p620:~$ ps -L -p 115108 -o pid,tid,psr,comm
PID TID PSR COMMAND
115108 115108 0 lmp
115108 115117 1 lmp
115108 115124 1 lmp
115108 115133 2 lmp
115108 115140 3 lmp
rankfile.txt
rank 0=localhost slot=0-3
rank 1=localhost slot=4-7
rank 2=localhost slot=8-11
rank 3=localhost slot=12-15
rank 4=localhost slot=16-19
rank 5=localhost slot=20-23
rank 6=localhost slot=24-27
rank 7=localhost slot=28-31
Loop time of 697.585 on 32 procs for 100000 steps with 157980 atoms
382.1% CPU use with 8 MPI tasks x 4 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %CPU | %total
-----------------------------------------------------------------------
Pair | 264.97 | 294.77 | 311.38 | 85.0 | 373.8 | 42.26
Bond | 0.064256 | 0.076352 | 0.089049 | 2.8 | 100.0 | 0.01
Neigh | 0.31285 | 0.32609 | 0.33607 | 1.3 | 100.0 | 0.05
Comm | 92.25 | 107.75 | 138.49 | 143.6 | 400.0 | 15.45
Output | 0.06342 | 1.2225 | 8.3039 | 242.3 | 115.3 | 0.18
Modify | 287.01 | 292.54 | 296.85 | 15.4 | 371.1 | 41.94
Other | | 0.8972 | | | | 0.13
Thread timing breakdown (MPI rank 0):
Total threaded time 297 / 42.6%
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 265.69 | 274.57 | 281.72 | 34.7 | 92.46
Bond | 0 | 0 | 0 | 0.0 | 0.00
Neigh | 0.32328 | 0.3247 | 0.32729 | 0.3 | 0.11
Reduce | 14.912 | 22.063 | 31.054 | 123.3 | 7.43
Nlocal: 19747.5 ave 20292 max 19223 min
Histogram: 2 0 0 1 2 1 0 0 0 2
Nghost: 10601.5 ave 10969 max 10033 min
Histogram: 1 0 1 0 0 1 1 1 2 1
Neighs: 966112 ave 1.0139e+06 max 902665 min
Histogram: 1 1 0 1 1 0 0 0 1 3
Total # of neighbors = 7728893
Ave neighs/atom = 48.923237
Ave special neighs/atom = 0
Neighbor list builds = 20
Dangerous builds = 0
System init for write_restart ...
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Last active /omp style is pair_style gran/hertz/history/omp
System init for write_data ...
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Last active /omp style is pair_style gran/hertz/history/omp
Total wall time: 17:05:35
P620
export OMP_NUM_THREADS=8
export OMP_PROC_BIND=close
export OMP_PLACES=cores
mpirun -np 4 \
--map-by rankfile:file=rankfile.txt \
--report-bindings \
lmp -in in.ST1.MSCDSS -sf omp -pk omp 8
mpirun -np 4 --map-by rankfile:file=rankfile.txt --report-bindings lmp -in in.ST1.MSCDSS -sf omp -pk omp 8
Loop time of 990.696 on 32 procs for 100000 steps with 157980 atoms
759.5% CPU use with 4 MPI tasks x 8 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %CPU | %total
-----------------------------------------------------------------------
Pair | 325.89 | 336.74 | 357.73 | 68.7 | 776.8 | 33.99
Bond | 0.070917 | 0.075338 | 0.079716 | 1.2 | 100.0 | 0.01
Neigh | 0.33093 | 0.33582 | 0.33921 | 0.6 | 100.0 | 0.03
Comm | 113.22 | 131.31 | 139.91 | 92.8 | 798.2 | 13.25
Output | 0.13425 | 2.2516 | 8.3586 | 235.0 | 174.7 | 0.23
Modify | 515.39 | 518.99 | 526.73 | 20.1 | 729.3 | 52.39
Other | | 0.9986 | | | | 0.10
Thread timing breakdown (MPI rank 0):
Total threaded time 325.1 / 32.8%
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 272.09 | 283.4 | 288.22 | 29.4 | 87.16
Bond | 0 | 0 | 0 | 0.0 | 0.00
Neigh | 0.29907 | 0.31309 | 0.32277 | 1.2 | 0.10
Reduce | 36.329 | 41.432 | 53.393 | 80.3 | 12.74
Nlocal: 39495 ave 39546 max 39444 min
Histogram: 2 0 0 0 0 0 0 0 0 2
Nghost: 13020.2 ave 13345 max 12721 min
Histogram: 1 0 0 1 0 1 0 0 0 1
Neighs: 1.9546e+06 ave 2.0147e+06 max 1.8712e+06 min
Histogram: 1 0 0 0 1 0 0 0 1 1
Total # of neighbors = 7818411
Ave neighs/atom = 49.489878
Ave special neighs/atom = 0
Neighbor list builds = 20
Dangerous builds = 0
System init for write_restart ...
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Last active /omp style is pair_style gran/hertz/history/omp
System init for write_data ...
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Last active /omp style is pair_style gran/hertz/history/omp
Total wall time: 29:45:53
rank 0=localhost slot=0-7
rank 1=localhost slot=8-15
rank 2=localhost slot=16-23
rank 3=localhost slot=24-31
====
For Threadripper 7995WX Pro,
NUMA Nodes
NUMA node(s): 4
NUMA node0 CPU(s): 0-7,32-39,64-71
NUMA node1 CPU(s): 16-23,48-55,80-87
NUMA node2 CPU(s): 24-31,56-63,88-95
NUMA node3 CPU(s): 8-15,40-47,72-79
rankfile
rank 0=localhost slot=0-7
rank 1=localhost slot=32-39
rank 2=localhost slot=64-71
rank 3=localhost slot=16-23
rank 4=localhost slot=48-55
rank 5=localhost slot=80-87
rank 6=localhost slot=24-31
rank 7=localhost slot=56-63
rank 8=localhost slot=88-95
rank 9=localhost slot=8-15
rank 10=localhost slot=40-47
rank 11=localhost slot=72-79
export OMP_NUM_THREADS=8
export OMP_PROC_BIND=close
export OMP_PLACES=cores
mpirun -np 12 --map-by rankfile:file=rankfile_7975wx.txt --report-bindings lmp -in in.ST1_4CM.MSCDSS -sf omp -pk omp 8'HPC' 카테고리의 다른 글
| IPoIB (0) | 2025.09.24 |
|---|---|
| Mellanox Connect-X (0) | 2025.07.08 |
| CPU Pinning and Affinity Check (0) | 2025.06.25 |
| LAMMPS on Intel Xeon (0) | 2025.05.14 |
| LAMMPS Process Mapping in OpenMPI - (3) --map-by ppr:<N>:<resource> (0) | 2025.05.08 |