[chibi@centos8 ~]$ sudo nvidia-docker run --rm -ti nvcr.io/nvidia/tensorflow:19.04-py3
Unable to find image 'nvcr.io/nvidia/tensorflow:19.04-py3' locally
19.04-py3: Pulling from nvidia/tensorflow
34667c7e4631: Pulling fs layer
d18d76a881a4: Pulling fs layer
119c7358fbfc: Pulling fs layer
2aaf13f3eff0: Pulling fs layer
202fa0f8874b: Pulling fs layer
3b700a61ede6: Pulling fs layer
87e6ca450d3f: Waiting
2aaf13f3eff0: Waiting
9b91fa2f9276: Waiting
b5877a9add73: Waiting
bab74df105f1: Waiting
534bbf505504: Waiting
4956bf3bbbb9: Waiting
f4371944c97d: Waiting
4615a735431d: Waiting
5db2639932b5: Waiting
629d5c9d75a4: Waiting
8071b94b5429: Waiting
6eb8eba2ad5a: Waiting
e32e86c15b8b: Pulling fs layer
08db5b51b243: Waiting
f71ce95fb406: Waiting
3498ed8c5685: Waiting
62819d8896c1: Waiting
34bc85bf8bef: Waiting
4a95ca3431c4: Waiting
41bc2d0a4d4d: Waiting
a2ceadc61854: Waiting
2d0c5308ff92: Pulling fs layer
a531832992b8: Waiting
b24a8fd8f2e1: Waiting
8d9313624ab7: Waiting
e5cafe011f22: Pull complete
eca19a329cd4: Pull complete
65ee50af0bcc: Pull complete
5f60ec8c32f4: Pull complete
d7dcb657fa13: Pull complete
1f6ef6575fbe: Pull complete
d1ef346a3015: Pull complete
4ef9cb404fd5: Pull complete
f6797f45a018: Pull complete
1d4380527325: Pull complete
965f2629db02: Pull complete
5debff4c8c0a: Pull complete
b3a3a9d82be6: Pull complete
eac05f20b729: Pull complete
3ce0a7f80167: Pull complete
2a21e34a5784: Pull complete
c1ccf19e258e: Pull complete
0b6ea9d0652b: Pull complete
307bc8c3f024: Pull complete
ca75fd593a79: Pull complete
0cd3cdca1af7: Pull complete
48e857e9d372: Pull complete
3264ea403ca9: Pull complete
Digest: sha256:aaebc136d5d50937362675c77afd908bd96cded68846f39163050a023c8a9851
Status: Downloaded newer image for nvcr.io/nvidia/tensorflow:19.04-py3
                                                                                
================
== TensorFlow ==
================

NVIDIA Release 19.04 (build 6132408)
TensorFlow Version 1.13.1

Container image Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
Copyright 2017-2019 The TensorFlow Authors.  All rights reserved.

Various files include modifications (c) NVIDIA CORPORATION.  All rights reserved.
NVIDIA modifications are covered by the license terms that apply to the underlying project or file.

NOTE: MOFED driver for multi-node communication was not detected.
      Multi-node communication performance may be reduced.

NOTE: The SHMEM allocation limit is set to the default of 64MB.  This may be
   insufficient for TensorFlow.  NVIDIA recommends the use of the following flags:
   nvidia-docker run --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 ...

root@54fef8265191:/workspace# ls
README.md  docker-examples  nvidia-examples
root@54fef8265191:/workspace# ls
README.md  docker-examples  nvidia-examples
root@54fef8265191:/workspace# cd nvidia-examples
root@54fef8265191:/workspace/nvidia-examples# ls
NCF              bert                 cnn           ssdv1.2
OpenSeq2Seq      big_lstm             gnmt_v2       tensorrt
UNet_Industrial  build_imagenet_data  resnet50v1.5
root@54fef8265191:/workspace/nvidia-examples# cd big_lstm
root@54fef8265191:/workspace/nvidia-examples/big_lstm# ls
1b_word_vocab.txt  data_utils_test.py         language_model_test.py
README.md          download_1b_words_data.sh  model_utils.py
__init__.py        hparams.py                 run_utils.py
common.py          hparams_test.py            single_lm_train.py
data_utils.py      language_model.py          testdata
root@54fef8265191:/workspace/nvidia-examples/big_lstm# ./download_1b_words_data.sh
Please specify root of dataset directory: data

Success: dataset root dir validated

--2020-07-08 18:44:59--  http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
Resolving www.statmt.org (www.statmt.org)... 129.215.197.184
Connecting to www.statmt.org (www.statmt.org)|129.215.197.184|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1792209805 (1.7G) [application/x-gzip]
Saving to: ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’

1-billion-word-lang 100%[===================>]   1.67G   618KB/s    in 82m 49s

2020-07-08 20:07:48 (352 KB/s) - ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’ saved [1792209805/1792209805]

1-billion-word-language-modeling-benchmark-r13output/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00024-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00057-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00055-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00096-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00081-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00033-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00072-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00082-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00018-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00008-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00059-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00005-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00091-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00062-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00031-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00095-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00076-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00006-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00038-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00015-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00087-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00021-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00049-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00009-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00027-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00056-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00046-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00032-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00029-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00088-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00085-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00011-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00012-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00067-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00003-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00093-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00050-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00053-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00044-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00019-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00066-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00028-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00045-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00039-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00071-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00052-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00078-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00037-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00002-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00014-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00048-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00017-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00004-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00077-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00080-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00020-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00051-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00016-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00079-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00043-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00068-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00099-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00064-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00034-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00054-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00040-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00070-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00063-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00041-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00083-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00061-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00073-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00094-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00030-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00060-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00035-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00023-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00042-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00025-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00090-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00089-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00065-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00075-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00022-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00026-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00098-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00084-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00010-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00069-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00013-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00092-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00036-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00097-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00007-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00074-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00001-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00047-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00086-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00058-of-00100
1-billion-word-language-modeling-benchmark-r13output/.svn/
1-billion-word-language-modeling-benchmark-r13output/.svn/tmp/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/de102cd0c91cd19e6612f0840e68a2f20ba8134c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/deed1b75d3bd5cc36ae6aeb85d56680b892b7948.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/86c58db52fbf362c5bc329afc33b8805085fcb0d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/9f2882e21f860a83ad6ea8898ebab140974ed301.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/bcdbc523ee7488dc438cab869b6d5e236578dbfa.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/d2718bc26d0ee0a213d7d4add99a304cb5b39ede.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/c5b24f61479da923123d0394a188da922ea0359c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/116d6ea61730d8199127596b072e981338597779.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/b0e26559cfe641245584a9400b35ba28d64f1411.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/d3ae508e3bcb0e696dd70aecd052410f1f7afc1d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/9e148bd766e8805e0eb97eeae250433ec7a2e996.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/31b645a482e0b81fda3c567cada307c6fcf7ec80.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/da39a3ee5e6b4b0d3255bfef95601890afd80709.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/c1ed42c415ec884e591fb5c70d373da640a383b5.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/e37ba0f85e94073ccaced1eed7e4f5d737a25f49.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/entries
1-billion-word-language-modeling-benchmark-r13output/.svn/format
1-billion-word-language-modeling-benchmark-r13output/.svn/wc.db
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00015-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00031-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00027-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00010-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00033-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00042-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00046-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00037-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00029-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00013-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00002-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00048-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00006-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00030-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00025-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00039-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00008-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00020-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00001-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00034-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00044-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00045-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00016-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00004-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00035-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00038-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00009-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00024-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00022-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00021-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00032-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00011-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00049-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00041-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00019-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00023-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00040-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00014-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00007-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00017-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00012-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00018-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00003-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00028-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en-00000-of-00100
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00043-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00005-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00036-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00026-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00047-of-00050
1-billion-word-language-modeling-benchmark-r13output/README

Success! One billion words dataset ready at:
data/1-billion-word-language-modeling-benchmark-r13output/
Please pass this dir to single_lm_train.py via the --datadir option.

root@54fef8265191:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=4 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'num_gpus': 4, 'num_delayed_steps': 150, 'run_profiler': False, 'num_steps': 20, 'num_shards': 8, 'max_grad_norm': 10.0, 'emb_size': 512, 'optimizer': 0, 'keep_prob': 0.9, 'num_layers': 1, 'average_params': True, 'state_size': 2048, 'num_sampled': 8192, 'do_summaries': False, 'vocab_size': 793470, 'max_time': 180, 'batch_size': 128, 'learning_rate': 0.2, 'projected_size': 512}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1594239936.6153083
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
model/model_2/state_2_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:2
model/model_3/state_3_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:3
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-07-08 20:25:37.440980: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2994435000 Hz
2020-07-08 20:25:37.442477: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xc4538c0 executing computations on platform Host. Devices:
2020-07-08 20:25:37.442504: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-07-08 20:25:37.953414: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xbfc5940 executing computations on platform CUDA. Devices:
2020-07-08 20:25:37.953444: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-07-08 20:25:37.953451: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-07-08 20:25:37.953456: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-07-08 20:25:37.953462: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-07-08 20:25:37.954515: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:04:00.0
totalMemory: 23.65GiB freeMemory: 23.23GiB
2020-07-08 20:25:37.954552: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.49GiB
2020-07-08 20:25:37.954578: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:41:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-07-08 20:25:37.954604: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:61:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-07-08 20:25:37.954634: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-07-08 20:25:38.779126: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-08 20:25:38.779179: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-07-08 20:25:38.779185: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-07-08 20:25:38.779190: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-07-08 20:25:38.779194: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-07-08 20:25:38.779203: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-07-08 20:25:38.779361: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22508 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:04:00.0, compute capability: 7.5)
2020-07-08 20:25:38.779705: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22765 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-07-08 20:25:38.780021: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10231 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:41:00.0, compute capability: 7.5)
2020-07-08 20:25:38.780163: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10231 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:61:00.0, compute capability: 7.5)
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00017-of-00100
Finished processing!
2020-07-08 20:26:03.051763: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 1, time = 12.43s, wps = 824, train loss = 12.9869
Iteration 2, time = 9.78s, wps = 1047, train loss = 12.9161
Iteration 3, time = 0.10s, wps = 100936, train loss = 12.8358
Iteration 4, time = 0.09s, wps = 114359, train loss = 11.3511
Iteration 5, time = 0.09s, wps = 111953, train loss = 12.2597
Iteration 6, time = 0.11s, wps = 96299, train loss = 32.9989
Iteration 7, time = 0.10s, wps = 106371, train loss = 13.8583
Iteration 8, time = 0.10s, wps = 102946, train loss = 11.2358
Iteration 9, time = 0.09s, wps = 110590, train loss = 27.0164
Iteration 20, time = 1.01s, wps = 111682, train loss = 10.9667
Iteration 40, time = 1.82s, wps = 112610, train loss = 9.0338
Iteration 60, time = 1.85s, wps = 110711, train loss = 8.7533
Iteration 80, time = 1.83s, wps = 111870, train loss = 8.1069
Iteration 100, time = 1.81s, wps = 113419, train loss = 8.2671
Iteration 120, time = 1.82s, wps = 112610, train loss = 7.3047
Iteration 140, time = 1.86s, wps = 109939, train loss = 6.9182
Iteration 160, time = 1.82s, wps = 112528, train loss = 7.0936
Iteration 180, time = 1.84s, wps = 111458, train loss = 6.5482
Iteration 200, time = 1.83s, wps = 111621, train loss = 6.2659
Iteration 220, time = 1.83s, wps = 111810, train loss = 6.3443
Iteration 240, time = 1.80s, wps = 114077, train loss = 6.2493
Iteration 260, time = 1.82s, wps = 112527, train loss = 6.1710
Iteration 280, time = 1.84s, wps = 111407, train loss = 6.1754
Iteration 300, time = 1.85s, wps = 110426, train loss = 6.0002
Iteration 320, time = 1.88s, wps = 109024, train loss = 6.0322
Iteration 340, time = 1.85s, wps = 110931, train loss = 5.9969
Iteration 360, time = 1.83s, wps = 111713, train loss = 5.9592
Iteration 380, time = 1.83s, wps = 112116, train loss = 5.9195
Iteration 400, time = 1.84s, wps = 111445, train loss = 5.9260
Iteration 420, time = 1.83s, wps = 112218, train loss = 5.8160
Iteration 440, time = 1.83s, wps = 111635, train loss = 5.7989
Iteration 460, time = 1.84s, wps = 111290, train loss = 5.7137
Iteration 480, time = 1.87s, wps = 109649, train loss = 5.7025
Iteration 500, time = 1.84s, wps = 111085, train loss = 5.7013
Iteration 520, time = 1.88s, wps = 109063, train loss = 5.7123
Iteration 540, time = 1.84s, wps = 111191, train loss = 5.6739
Iteration 560, time = 1.85s, wps = 110972, train loss = 5.5763
Iteration 580, time = 1.85s, wps = 110904, train loss = 5.6252
Iteration 600, time = 1.86s, wps = 110218, train loss = 5.5784
Iteration 620, time = 1.83s, wps = 111639, train loss = 5.5929
Iteration 640, time = 1.84s, wps = 111205, train loss = 5.5271
Iteration 660, time = 1.84s, wps = 111477, train loss = 5.4646
Iteration 680, time = 1.85s, wps = 110664, train loss = 5.4808
Iteration 700, time = 1.87s, wps = 109401, train loss = 5.4542
Iteration 720, time = 1.85s, wps = 110561, train loss = 5.4642
Iteration 740, time = 1.83s, wps = 111773, train loss = 5.4285
Iteration 760, time = 1.83s, wps = 112193, train loss = 5.4117
Iteration 780, time = 1.84s, wps = 111018, train loss = 5.3448
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00076-of-00100
Finished processing!
Iteration 800, time = 3.85s, wps = 53237, train loss = 5.3551
Iteration 820, time = 1.87s, wps = 109483, train loss = 5.3774
Iteration 840, time = 1.86s, wps = 110178, train loss = 5.3299
Iteration 860, time = 1.87s, wps = 109592, train loss = 5.3319
Iteration 880, time = 1.84s, wps = 111044, train loss = 5.3090
Iteration 900, time = 1.84s, wps = 111490, train loss = 5.2715
Iteration 920, time = 1.88s, wps = 108968, train loss = 5.2970
Iteration 940, time = 1.86s, wps = 110269, train loss = 5.2309
Iteration 960, time = 1.84s, wps = 111290, train loss = 5.2785
Iteration 980, time = 1.82s, wps = 112243, train loss = 5.2517
Iteration 1000, time = 1.84s, wps = 111352, train loss = 5.2165
Iteration 1020, time = 1.87s, wps = 109777, train loss = 5.2199
Iteration 1040, time = 1.83s, wps = 112213, train loss = 5.2180
Iteration 1060, time = 1.85s, wps = 110665, train loss = 5.1982
Iteration 1080, time = 1.83s, wps = 111886, train loss = 5.2111
Iteration 1100, time = 1.83s, wps = 111717, train loss = 5.2161
Iteration 1120, time = 1.85s, wps = 110655, train loss = 5.1410
Iteration 1140, time = 1.87s, wps = 109311, train loss = 5.1594
Iteration 1160, time = 1.84s, wps = 111067, train loss = 5.2235
Iteration 1180, time = 1.88s, wps = 109048, train loss = 5.1344
Iteration 1200, time = 1.86s, wps = 109871, train loss = 5.0604
Iteration 1220, time = 1.84s, wps = 111567, train loss = 5.0838
Iteration 1240, time = 1.85s, wps = 110833, train loss = 5.0898
Iteration 1260, time = 1.86s, wps = 110090, train loss = 5.0945
Iteration 1280, time = 1.85s, wps = 110782, train loss = 5.1108
Iteration 1300, time = 1.85s, wps = 110981, train loss = 5.1215
Iteration 1320, time = 1.86s, wps = 110262, train loss = 5.0364
Iteration 1340, time = 1.86s, wps = 110162, train loss = 5.0094
Iteration 1360, time = 1.90s, wps = 107890, train loss = 4.9572
Iteration 1380, time = 1.86s, wps = 110316, train loss = 5.0176
Iteration 1400, time = 1.85s, wps = 110554, train loss = 5.0093
Iteration 1420, time = 1.87s, wps = 109489, train loss = 5.0073
Iteration 1440, time = 1.85s, wps = 110699, train loss = 4.9904
Iteration 1460, time = 1.86s, wps = 109881, train loss = 4.9507
Iteration 1480, time = 1.87s, wps = 109738, train loss = 4.9475
Iteration 1500, time = 1.84s, wps = 111165, train loss = 4.9536
Iteration 1520, time = 1.85s, wps = 110925, train loss = 4.9564
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m17.024s
user    15m32.185s
sys     1m30.813s
root@54fef8265191:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=3 --datadir=./data/1-billion-word-
language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'run_profiler': False, 'num_sampled': 8192, 'state_size': 2048, 'projected_size': 512, 'keep_prob': 0.9, 'num_shards': 8, 'num_delayed_steps': 150, 'average_params': True, 'num_layers': 1, 'vocab_size': 793470, 'batch_size': 128, 'learning_rate': 0.2, 'max_grad_norm': 10.0, 'max_time': 180, 'emb_size': 512, 'num_steps': 20, 'num_gpus': 3, 'optimizer': 0, 'do_summaries': False}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1594241416.3491683
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
model/model_2/state_2_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:2
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-07-08 20:50:17.025924: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2994435000 Hz
2020-07-08 20:50:17.027788: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xa561890 executing computations on platform Host. Devices:
2020-07-08 20:50:17.027820: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-07-08 20:50:17.537921: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xa0fc270 executing computations on platform CUDA. Devices:
2020-07-08 20:50:17.537951: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-07-08 20:50:17.537958: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-07-08 20:50:17.537969: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-07-08 20:50:17.537976: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-07-08 20:50:17.539238: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:04:00.0
totalMemory: 23.65GiB freeMemory: 23.22GiB
2020-07-08 20:50:17.539275: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.49GiB
2020-07-08 20:50:17.539302: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:41:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-07-08 20:50:17.539330: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:61:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-07-08 20:50:17.539362: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-07-08 20:50:18.350969: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-08 20:50:18.351022: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-07-08 20:50:18.351029: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-07-08 20:50:18.351035: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-07-08 20:50:18.351040: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-07-08 20:50:18.351048: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-07-08 20:50:18.351211: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22508 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:04:00.0, compute capability: 7.5)
2020-07-08 20:50:18.351639: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22765 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-07-08 20:50:18.351832: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10231 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:41:00.0, compute capability: 7.5)
2020-07-08 20:50:18.352135: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10231 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:61:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00030-of-00100
Finished processing!
2020-07-08 20:50:34.117424: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 1528, time = 9.81s, wps = 783, train loss = 6.6405
Iteration 1529, time = 7.28s, wps = 1055, train loss = 5.0638
Iteration 1530, time = 0.09s, wps = 86838, train loss = 5.0377
Iteration 1531, time = 0.08s, wps = 91882, train loss = 4.9678
Iteration 1532, time = 0.08s, wps = 92417, train loss = 4.8845
Iteration 1533, time = 0.08s, wps = 94769, train loss = 4.8899
Iteration 1534, time = 0.08s, wps = 98750, train loss = 4.8734
Iteration 1535, time = 0.08s, wps = 98003, train loss = 4.8957
Iteration 1536, time = 0.08s, wps = 101376, train loss = 4.9655
Iteration 1547, time = 0.86s, wps = 97822, train loss = 4.9413
Iteration 1567, time = 1.56s, wps = 98773, train loss = 4.9598
Iteration 1587, time = 1.57s, wps = 97591, train loss = 4.9349
Iteration 1607, time = 1.58s, wps = 96952, train loss = 4.8583
Iteration 1627, time = 1.56s, wps = 98646, train loss = 4.8710
Iteration 1647, time = 1.57s, wps = 98040, train loss = 4.9904
Iteration 1667, time = 1.58s, wps = 97250, train loss = 4.9201
Iteration 1687, time = 1.58s, wps = 97106, train loss = 4.8661
Iteration 1707, time = 1.55s, wps = 99022, train loss = 4.8316
Iteration 1727, time = 1.59s, wps = 96626, train loss = 4.9152
Iteration 1747, time = 1.57s, wps = 97890, train loss = 4.9025
Iteration 1767, time = 1.57s, wps = 97542, train loss = 4.8516
Iteration 1787, time = 1.59s, wps = 96577, train loss = 4.8567
Iteration 1807, time = 1.58s, wps = 96916, train loss = 4.9273
Iteration 1827, time = 1.58s, wps = 97062, train loss = 4.8319
Iteration 1847, time = 1.56s, wps = 98623, train loss = 4.8346
Iteration 1867, time = 1.58s, wps = 97266, train loss = 4.8151
Iteration 1887, time = 1.57s, wps = 98013, train loss = 4.8483
Iteration 1907, time = 1.55s, wps = 98780, train loss = 4.8667
Iteration 1927, time = 1.57s, wps = 98030, train loss = 4.8561
Iteration 1947, time = 1.57s, wps = 97926, train loss = 4.8239
Iteration 1967, time = 1.58s, wps = 97007, train loss = 4.7892
Iteration 1987, time = 1.56s, wps = 98439, train loss = 4.8350
Iteration 2007, time = 1.59s, wps = 96554, train loss = 4.8324
Iteration 2027, time = 1.59s, wps = 96702, train loss = 4.7452
Iteration 2047, time = 1.56s, wps = 98393, train loss = 4.8185
Iteration 2067, time = 1.60s, wps = 96135, train loss = 4.7902
Iteration 2087, time = 1.58s, wps = 97402, train loss = 4.7387
Iteration 2107, time = 1.58s, wps = 97032, train loss = 4.7636
Iteration 2127, time = 1.56s, wps = 98154, train loss = 4.7707
Iteration 2147, time = 1.58s, wps = 97395, train loss = 4.7024
Iteration 2167, time = 1.56s, wps = 98434, train loss = 4.7733
Iteration 2187, time = 1.58s, wps = 97295, train loss = 4.8112
Iteration 2207, time = 1.55s, wps = 99139, train loss = 4.7185
Iteration 2227, time = 1.59s, wps = 96844, train loss = 4.7398
Iteration 2247, time = 1.58s, wps = 97083, train loss = 4.7441
Iteration 2267, time = 1.57s, wps = 97833, train loss = 4.7514
Iteration 2287, time = 1.57s, wps = 97725, train loss = 4.7567
Iteration 2307, time = 1.57s, wps = 97978, train loss = 4.7345
Iteration 2327, time = 1.57s, wps = 97793, train loss = 4.7516
Iteration 2347, time = 1.57s, wps = 97793, train loss = 4.6619
Iteration 2367, time = 1.57s, wps = 97599, train loss = 4.7856
Iteration 2387, time = 1.58s, wps = 97288, train loss = 4.7842
Iteration 2407, time = 1.58s, wps = 97311, train loss = 4.6921
Iteration 2427, time = 1.58s, wps = 97130, train loss = 4.7217
Iteration 2447, time = 1.58s, wps = 97193, train loss = 4.7184
Iteration 2467, time = 1.58s, wps = 97443, train loss = 4.7137
Iteration 2487, time = 1.56s, wps = 98691, train loss = 4.7000
Iteration 2507, time = 1.56s, wps = 98570, train loss = 4.6394
Iteration 2527, time = 1.56s, wps = 98629, train loss = 4.6162
Iteration 2547, time = 1.57s, wps = 98102, train loss = 4.6974
Iteration 2567, time = 1.57s, wps = 97621, train loss = 4.6862
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00093-of-00100
Finished processing!
Iteration 2587, time = 3.60s, wps = 42612, train loss = 4.6388
Iteration 2607, time = 1.56s, wps = 98178, train loss = 4.6596
Iteration 2627, time = 1.58s, wps = 97368, train loss = 4.6247
Iteration 2647, time = 1.58s, wps = 97316, train loss = 4.6970
Iteration 2667, time = 1.55s, wps = 98783, train loss = 4.6765
Iteration 2687, time = 1.60s, wps = 95819, train loss = 4.6601
Iteration 2707, time = 1.59s, wps = 96368, train loss = 4.6784
Iteration 2727, time = 1.58s, wps = 97372, train loss = 4.6179
Iteration 2747, time = 1.58s, wps = 97035, train loss = 4.6739
Iteration 2767, time = 1.56s, wps = 98411, train loss = 4.7031
Iteration 2787, time = 1.57s, wps = 97600, train loss = 4.6936
Iteration 2807, time = 1.56s, wps = 98381, train loss = 4.6581
Iteration 2827, time = 1.57s, wps = 98101, train loss = 4.6225
Iteration 2847, time = 1.56s, wps = 98377, train loss = 4.5996
Iteration 2867, time = 1.58s, wps = 97254, train loss = 4.6707
Iteration 2887, time = 1.58s, wps = 96940, train loss = 4.5642
Iteration 2907, time = 1.58s, wps = 97496, train loss = 4.6706
Iteration 2927, time = 1.58s, wps = 97244, train loss = 4.6052
Iteration 2947, time = 1.56s, wps = 98413, train loss = 4.6468
Iteration 2967, time = 1.56s, wps = 98519, train loss = 4.5707
Iteration 2987, time = 1.57s, wps = 97633, train loss = 4.6220
Iteration 3007, time = 1.57s, wps = 97670, train loss = 4.6023
Iteration 3027, time = 1.60s, wps = 96207, train loss = 4.6462
Iteration 3047, time = 1.58s, wps = 97321, train loss = 4.5654
Iteration 3067, time = 1.56s, wps = 98516, train loss = 4.5825
Iteration 3087, time = 1.56s, wps = 98386, train loss = 4.5827
Iteration 3107, time = 1.56s, wps = 98660, train loss = 4.5872
Iteration 3127, time = 1.55s, wps = 99003, train loss = 4.6210
Iteration 3147, time = 1.57s, wps = 98007, train loss = 4.5325
Iteration 3167, time = 1.58s, wps = 97065, train loss = 4.5406
Iteration 3187, time = 1.56s, wps = 98263, train loss = 4.6343
Iteration 3207, time = 1.59s, wps = 96630, train loss = 4.6138
Iteration 3227, time = 1.59s, wps = 96829, train loss = 4.6093
Iteration 3247, time = 1.59s, wps = 96607, train loss = 4.5742
Iteration 3267, time = 1.55s, wps = 98907, train loss = 4.6552
Iteration 3287, time = 1.57s, wps = 97554, train loss = 4.5890
Iteration 3307, time = 1.59s, wps = 96726, train loss = 4.5771
Iteration 3327, time = 1.58s, wps = 97111, train loss = 4.5454
Iteration 3347, time = 1.58s, wps = 97338, train loss = 4.5239
Iteration 3367, time = 1.57s, wps = 97956, train loss = 4.5262
Iteration 3387, time = 1.57s, wps = 97850, train loss = 4.5259
Iteration 3407, time = 1.57s, wps = 97911, train loss = 4.5427
Iteration 3427, time = 1.58s, wps = 97118, train loss = 4.4896
Iteration 3447, time = 1.57s, wps = 97713, train loss = 4.5674
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m14.972s
user    14m27.252s
sys     1m17.761s
root@54fef8265191:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-
language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'run_profiler': False, 'num_sampled': 8192, 'keep_prob': 0.9, 'num_delayed_steps': 150, 'emb_size': 512, 'state_size': 2048, 'average_params': True, 'learning_rate': 0.2, 'optimizer': 0, 'num_gpus': 2, 'batch_size': 128, 'projected_size': 512, 'do_summaries': False, 'max_time': 180, 'num_shards': 8, 'num_steps': 20, 'max_grad_norm': 10.0, 'num_layers': 1, 'vocab_size': 793470}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1594242935.5213037
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-07-08 21:15:36.044983: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2994435000 Hz
2020-07-08 21:15:36.046994: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x964b7d0 executing computations on platform Host. Devices:
2020-07-08 21:15:36.047022: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-07-08 21:15:36.560976: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x964b1f0 executing computations on platform CUDA. Devices:
2020-07-08 21:15:36.561011: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-07-08 21:15:36.561019: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-07-08 21:15:36.561025: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-07-08 21:15:36.561030: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-07-08 21:15:36.562150: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:04:00.0
totalMemory: 23.65GiB freeMemory: 23.22GiB
2020-07-08 21:15:36.562188: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.49GiB
2020-07-08 21:15:36.562218: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:41:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-07-08 21:15:36.562245: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:61:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-07-08 21:15:36.562275: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-07-08 21:15:37.367982: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-08 21:15:37.368030: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-07-08 21:15:37.368037: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-07-08 21:15:37.368042: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-07-08 21:15:37.368046: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-07-08 21:15:37.368052: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-07-08 21:15:37.368214: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22507 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:04:00.0, compute capability: 7.5)
2020-07-08 21:15:37.368469: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22765 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-07-08 21:15:37.368695: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10231 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:41:00.0, compute capability: 7.5)
2020-07-08 21:15:37.368839: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10231 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:61:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00032-of-00100
Finished processing!
2020-07-08 21:15:49.296730: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 3465, time = 6.77s, wps = 756, train loss = 4.7211
Iteration 3466, time = 4.41s, wps = 1161, train loss = 4.6154
Iteration 3467, time = 0.08s, wps = 66494, train loss = 4.6011
Iteration 3468, time = 0.08s, wps = 65567, train loss = 4.5168
Iteration 3469, time = 0.08s, wps = 65980, train loss = 4.5496
Iteration 3470, time = 0.08s, wps = 63503, train loss = 4.5731
Iteration 3471, time = 0.08s, wps = 65882, train loss = 4.4266
Iteration 3472, time = 0.07s, wps = 69564, train loss = 4.4478
Iteration 3473, time = 0.07s, wps = 70211, train loss = 4.5286
Iteration 3484, time = 0.79s, wps = 70895, train loss = 4.5376
Iteration 3504, time = 1.44s, wps = 71353, train loss = 4.5987
Iteration 3524, time = 1.44s, wps = 71138, train loss = 4.5448
Iteration 3544, time = 1.43s, wps = 71445, train loss = 4.5429
Iteration 3564, time = 1.47s, wps = 69895, train loss = 4.5837
Iteration 3584, time = 1.45s, wps = 70861, train loss = 4.5886
Iteration 3604, time = 1.44s, wps = 71228, train loss = 4.4607
Iteration 3624, time = 1.43s, wps = 71501, train loss = 4.4825
Iteration 3644, time = 1.45s, wps = 70463, train loss = 4.4487
Iteration 3664, time = 1.44s, wps = 70953, train loss = 4.4824
Iteration 3684, time = 1.45s, wps = 70823, train loss = 4.5420
Iteration 3704, time = 1.44s, wps = 71046, train loss = 4.5217
Iteration 3724, time = 1.44s, wps = 71002, train loss = 4.4703
Iteration 3744, time = 1.44s, wps = 71328, train loss = 4.5396
Iteration 3764, time = 1.44s, wps = 71188, train loss = 4.4526
Iteration 3784, time = 1.44s, wps = 71284, train loss = 4.4968
Iteration 3804, time = 1.43s, wps = 71432, train loss = 4.4286
Iteration 3824, time = 1.44s, wps = 71175, train loss = 4.5709
Iteration 3844, time = 1.44s, wps = 71188, train loss = 4.4059
Iteration 3864, time = 1.44s, wps = 70914, train loss = 4.5440
Iteration 3884, time = 1.45s, wps = 70827, train loss = 4.4753
Iteration 3904, time = 1.44s, wps = 71068, train loss = 4.5070
Iteration 3924, time = 1.44s, wps = 71059, train loss = 4.4773
Iteration 3944, time = 1.45s, wps = 70710, train loss = 4.5267
Iteration 3964, time = 1.46s, wps = 70315, train loss = 4.4760
Iteration 3984, time = 1.45s, wps = 70477, train loss = 4.5708
Iteration 4004, time = 1.45s, wps = 70661, train loss = 4.5337
Iteration 4024, time = 1.44s, wps = 71149, train loss = 4.5090
Iteration 4044, time = 1.44s, wps = 71213, train loss = 4.4792
Iteration 4064, time = 1.44s, wps = 71272, train loss = 4.5567
Iteration 4084, time = 1.46s, wps = 70182, train loss = 4.4019
Iteration 4104, time = 1.45s, wps = 70752, train loss = 4.4502
Iteration 4124, time = 1.43s, wps = 71658, train loss = 4.4771
Iteration 4144, time = 1.44s, wps = 71121, train loss = 4.4634
Iteration 4164, time = 1.43s, wps = 71362, train loss = 4.4410
Iteration 4184, time = 1.44s, wps = 71127, train loss = 4.4459
Iteration 4204, time = 1.46s, wps = 70344, train loss = 4.4494
Iteration 4224, time = 1.44s, wps = 71272, train loss = 4.5417
Iteration 4244, time = 1.44s, wps = 70975, train loss = 4.3760
Iteration 4264, time = 1.44s, wps = 70890, train loss = 4.4919
Iteration 4284, time = 1.45s, wps = 70607, train loss = 4.3863
Iteration 4304, time = 1.44s, wps = 70935, train loss = 4.4387
Iteration 4324, time = 1.45s, wps = 70461, train loss = 4.4748
Iteration 4344, time = 1.44s, wps = 70955, train loss = 4.3912
Iteration 4364, time = 1.44s, wps = 71067, train loss = 4.4607
Iteration 4384, time = 1.45s, wps = 70402, train loss = 4.4438
Iteration 4404, time = 1.45s, wps = 70645, train loss = 4.5261
Iteration 4424, time = 1.45s, wps = 70764, train loss = 4.5004
Iteration 4444, time = 1.46s, wps = 70180, train loss = 4.4643
Iteration 4464, time = 1.46s, wps = 70161, train loss = 4.4385
Iteration 4484, time = 1.45s, wps = 70614, train loss = 4.4957
Iteration 4504, time = 1.45s, wps = 70549, train loss = 4.4585
Iteration 4524, time = 1.45s, wps = 70652, train loss = 4.4897
Iteration 4544, time = 1.43s, wps = 71425, train loss = 4.4154
Iteration 4564, time = 1.46s, wps = 70103, train loss = 4.4231
Iteration 4584, time = 1.45s, wps = 70378, train loss = 4.4420
Iteration 4604, time = 1.44s, wps = 70943, train loss = 4.4806
Iteration 4624, time = 1.46s, wps = 70310, train loss = 4.4674
Iteration 4644, time = 1.46s, wps = 70251, train loss = 4.4735
Iteration 4664, time = 1.44s, wps = 71025, train loss = 4.5010
Iteration 4684, time = 1.45s, wps = 70857, train loss = 4.3788
Iteration 4704, time = 1.45s, wps = 70689, train loss = 4.3390
Iteration 4724, time = 1.44s, wps = 71003, train loss = 4.3767
Iteration 4744, time = 1.43s, wps = 71484, train loss = 4.3683
Iteration 4764, time = 1.45s, wps = 70571, train loss = 4.3105
Iteration 4784, time = 1.46s, wps = 70304, train loss = 4.4008
Iteration 4804, time = 1.45s, wps = 70713, train loss = 4.4823
Iteration 4824, time = 1.45s, wps = 70802, train loss = 4.4164
Iteration 4844, time = 1.44s, wps = 71210, train loss = 4.4463
Iteration 4864, time = 1.45s, wps = 70569, train loss = 4.4747
Iteration 4884, time = 1.45s, wps = 70782, train loss = 4.5030
Iteration 4904, time = 1.45s, wps = 70808, train loss = 4.4497
Iteration 4924, time = 1.45s, wps = 70484, train loss = 4.4401
Iteration 4944, time = 1.46s, wps = 70252, train loss = 4.4126
Iteration 4964, time = 1.45s, wps = 70498, train loss = 4.4095
Iteration 4984, time = 1.45s, wps = 70646, train loss = 4.5218
Iteration 5004, time = 1.44s, wps = 70943, train loss = 4.3385
Iteration 5024, time = 1.45s, wps = 70566, train loss = 4.3568
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00022-of-00100
Finished processing!
Iteration 5044, time = 3.49s, wps = 29339, train loss = 4.3470
Iteration 5064, time = 1.46s, wps = 70175, train loss = 4.4421
Iteration 5084, time = 1.45s, wps = 70564, train loss = 4.4050
Iteration 5104, time = 1.46s, wps = 70047, train loss = 4.3602
Iteration 5124, time = 1.45s, wps = 70653, train loss = 4.3897
Iteration 5144, time = 1.44s, wps = 71028, train loss = 4.3944
Iteration 5164, time = 1.45s, wps = 70854, train loss = 4.3717
Iteration 5184, time = 1.45s, wps = 70510, train loss = 4.2879
Iteration 5204, time = 1.46s, wps = 70304, train loss = 4.4886
Iteration 5224, time = 1.46s, wps = 70365, train loss = 4.3693
Iteration 5244, time = 1.45s, wps = 70511, train loss = 4.3525
Iteration 5264, time = 1.45s, wps = 70727, train loss = 4.3850
Iteration 5284, time = 1.45s, wps = 70427, train loss = 4.4588
Iteration 5304, time = 1.44s, wps = 71185, train loss = 4.3738
Iteration 5324, time = 1.45s, wps = 70792, train loss = 4.3493
Iteration 5344, time = 1.44s, wps = 71104, train loss = 4.3739
Iteration 5364, time = 1.44s, wps = 71039, train loss = 4.3943
Iteration 5384, time = 1.45s, wps = 70864, train loss = 4.4014
Iteration 5404, time = 1.46s, wps = 70312, train loss = 4.3578
Iteration 5424, time = 1.45s, wps = 70612, train loss = 4.4675
Iteration 5444, time = 1.44s, wps = 71045, train loss = 4.3786
Iteration 5464, time = 1.44s, wps = 70952, train loss = 4.3525
Iteration 5484, time = 1.46s, wps = 70313, train loss = 4.4982
Iteration 5504, time = 1.47s, wps = 69661, train loss = 4.4396
Iteration 5524, time = 1.46s, wps = 70093, train loss = 4.3884
Iteration 5544, time = 1.46s, wps = 70279, train loss = 4.3729
Iteration 5564, time = 1.47s, wps = 69849, train loss = 4.4117
Iteration 5584, time = 1.45s, wps = 70652, train loss = 4.3152
Iteration 5604, time = 1.45s, wps = 70450, train loss = 4.3543
Iteration 5624, time = 1.47s, wps = 69602, train loss = 4.3302
Iteration 5644, time = 1.46s, wps = 70210, train loss = 4.4176
Iteration 5664, time = 1.46s, wps = 70226, train loss = 4.3984
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m12.932s
user    13m42.127s
sys     1m6.308s
root@54fef8265191:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=1 --datadir=./data/1-billion-word-
language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'state_size': 2048, 'run_profiler': False, 'batch_size': 128, 'num_sampled': 8192, 'projected_size': 512, 'num_delayed_steps': 150, 'num_gpus': 1, 'num_steps': 20, 'optimizer': 0, 'learning_rate': 0.2, 'emb_size': 512, 'max_time': 180, 'max_grad_norm': 10.0, 'vocab_size': 793470, 'num_shards': 8, 'do_summaries': False, 'keep_prob': 0.9, 'num_layers': 1, 'average_params': True}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1594244160.2038493
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-07-08 21:36:00.457879: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2994435000 Hz
2020-07-08 21:36:00.459869: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x6ec80e0 executing computations on platform Host. Devices:
2020-07-08 21:36:00.459901: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-07-08 21:36:00.965615: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x6ea3cc0 executing computations on platform CUDA. Devices:
2020-07-08 21:36:00.965645: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-07-08 21:36:00.965651: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-07-08 21:36:00.965658: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-07-08 21:36:00.965664: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-07-08 21:36:00.966741: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:04:00.0
totalMemory: 23.65GiB freeMemory: 23.22GiB
2020-07-08 21:36:00.966867: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.49GiB
2020-07-08 21:36:00.966967: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:41:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-07-08 21:36:00.967005: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:61:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-07-08 21:36:00.967046: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-07-08 21:36:01.786023: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-08 21:36:01.786078: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-07-08 21:36:01.786088: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-07-08 21:36:01.786095: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-07-08 21:36:01.786102: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-07-08 21:36:01.786109: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-07-08 21:36:01.786283: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22507 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:04:00.0, compute capability: 7.5)
2020-07-08 21:36:01.786622: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22765 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-07-08 21:36:01.786938: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10231 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:41:00.0, compute capability: 7.5)
2020-07-08 21:36:01.787139: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10231 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:61:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00096-of-00100
Finished processing!
2020-07-08 21:36:10.433515: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 5667, time = 4.36s, wps = 587, train loss = 4.7073
Iteration 5668, time = 2.17s, wps = 1180, train loss = 4.4845
Iteration 5669, time = 0.06s, wps = 41821, train loss = 4.2376
Iteration 5670, time = 0.06s, wps = 43831, train loss = 4.3336
Iteration 5671, time = 0.06s, wps = 43080, train loss = 4.4564
Iteration 5672, time = 0.06s, wps = 43732, train loss = 4.3242
Iteration 5673, time = 0.06s, wps = 44095, train loss = 4.3350
Iteration 5674, time = 0.05s, wps = 47805, train loss = 4.4088
Iteration 5675, time = 0.05s, wps = 49841, train loss = 4.5052
Iteration 5686, time = 0.55s, wps = 51280, train loss = 4.3955
Iteration 5706, time = 1.02s, wps = 50347, train loss = 4.4134
Iteration 5726, time = 1.02s, wps = 50170, train loss = 4.3475
Iteration 5746, time = 1.02s, wps = 49980, train loss = 4.3185
Iteration 5766, time = 1.02s, wps = 50220, train loss = 4.5059
Iteration 5786, time = 1.03s, wps = 49545, train loss = 4.3327
Iteration 5806, time = 1.02s, wps = 50340, train loss = 4.4152
Iteration 5826, time = 1.01s, wps = 50505, train loss = 4.3965
Iteration 5846, time = 1.03s, wps = 49877, train loss = 4.3117
Iteration 5866, time = 1.02s, wps = 50261, train loss = 4.4503
Iteration 5886, time = 1.03s, wps = 49888, train loss = 4.4332
Iteration 5906, time = 1.02s, wps = 49999, train loss = 4.3964
Iteration 5926, time = 1.02s, wps = 49967, train loss = 4.3849
Iteration 5946, time = 1.02s, wps = 50183, train loss = 4.5111
Iteration 5966, time = 1.02s, wps = 50103, train loss = 4.4020
Iteration 5986, time = 1.02s, wps = 50152, train loss = 4.5202
Iteration 6006, time = 1.02s, wps = 50434, train loss = 4.3150
Iteration 6026, time = 1.03s, wps = 49492, train loss = 4.4371
Iteration 6046, time = 1.02s, wps = 50154, train loss = 4.3737
Iteration 6066, time = 1.03s, wps = 49634, train loss = 4.3898
Iteration 6086, time = 1.02s, wps = 50040, train loss = 4.2683
Iteration 6106, time = 1.02s, wps = 50060, train loss = 4.3172
Iteration 6126, time = 1.02s, wps = 50094, train loss = 4.3707
Iteration 6146, time = 1.04s, wps = 49151, train loss = 4.4241
Iteration 6166, time = 1.02s, wps = 50018, train loss = 4.3458
Iteration 6186, time = 1.02s, wps = 50283, train loss = 4.3331
Iteration 6206, time = 1.04s, wps = 49355, train loss = 4.3132
Iteration 6226, time = 1.02s, wps = 49967, train loss = 4.3651
Iteration 6246, time = 1.03s, wps = 49866, train loss = 4.3908
Iteration 6266, time = 1.04s, wps = 49346, train loss = 4.4169
Iteration 6286, time = 1.02s, wps = 50056, train loss = 4.5224
Iteration 6306, time = 1.03s, wps = 49826, train loss = 4.2484
Iteration 6326, time = 1.03s, wps = 49892, train loss = 4.4051
Iteration 6346, time = 1.01s, wps = 50477, train loss = 4.4333
Iteration 6366, time = 1.02s, wps = 50248, train loss = 4.3204
Iteration 6386, time = 1.03s, wps = 49653, train loss = 4.4293
Iteration 6406, time = 1.03s, wps = 49852, train loss = 4.4988
Iteration 6426, time = 1.03s, wps = 49685, train loss = 4.3940
Iteration 6446, time = 1.04s, wps = 49253, train loss = 4.3912
Iteration 6466, time = 1.03s, wps = 49798, train loss = 4.4170
Iteration 6486, time = 1.03s, wps = 49940, train loss = 4.3682
Iteration 6506, time = 1.03s, wps = 49942, train loss = 4.3021
Iteration 6526, time = 1.03s, wps = 49788, train loss = 4.3269
Iteration 6546, time = 1.03s, wps = 49590, train loss = 4.3205
Iteration 6566, time = 1.04s, wps = 49312, train loss = 4.3260
Iteration 6586, time = 1.03s, wps = 49683, train loss = 4.5030
Iteration 6606, time = 1.03s, wps = 49562, train loss = 4.4274
Iteration 6626, time = 1.03s, wps = 49947, train loss = 4.3394
Iteration 6646, time = 1.03s, wps = 49832, train loss = 4.2911
Iteration 6666, time = 1.02s, wps = 50031, train loss = 4.3244
Iteration 6686, time = 1.02s, wps = 50263, train loss = 4.3826
Iteration 6706, time = 1.03s, wps = 49671, train loss = 4.3167
Iteration 6726, time = 1.03s, wps = 49558, train loss = 4.2390
Iteration 6746, time = 1.03s, wps = 49819, train loss = 4.3225
Iteration 6766, time = 1.03s, wps = 49848, train loss = 4.5187
Iteration 6786, time = 1.04s, wps = 49122, train loss = 4.2542
Iteration 6806, time = 1.03s, wps = 49862, train loss = 4.3144
Iteration 6826, time = 1.04s, wps = 49416, train loss = 4.4573
Iteration 6846, time = 1.03s, wps = 49900, train loss = 4.3348
Iteration 6866, time = 1.03s, wps = 49520, train loss = 4.3248
Iteration 6886, time = 1.04s, wps = 49377, train loss = 4.4215
Iteration 6906, time = 1.03s, wps = 49776, train loss = 4.2702
Iteration 6926, time = 1.03s, wps = 49656, train loss = 4.2068
Iteration 6946, time = 1.04s, wps = 49116, train loss = 4.2652
Iteration 6966, time = 1.03s, wps = 49613, train loss = 4.3025
Iteration 6986, time = 1.03s, wps = 49585, train loss = 4.3752
Iteration 7006, time = 1.03s, wps = 49591, train loss = 4.4308
Iteration 7026, time = 1.02s, wps = 50257, train loss = 4.3755
Iteration 7046, time = 1.03s, wps = 49791, train loss = 4.3189
Iteration 7066, time = 1.04s, wps = 49467, train loss = 4.4692
Iteration 7086, time = 1.06s, wps = 48360, train loss = 4.2969
Iteration 7106, time = 1.03s, wps = 49694, train loss = 4.4586
Iteration 7126, time = 1.03s, wps = 49593, train loss = 4.3040
Iteration 7146, time = 1.04s, wps = 49316, train loss = 4.3485
Iteration 7166, time = 1.03s, wps = 49629, train loss = 4.3384
Iteration 7186, time = 1.03s, wps = 49572, train loss = 4.3837
Iteration 7206, time = 1.04s, wps = 49284, train loss = 4.3827
Iteration 7226, time = 1.04s, wps = 49077, train loss = 4.2236
Iteration 7246, time = 1.04s, wps = 49236, train loss = 4.3304
Iteration 7266, time = 1.04s, wps = 49218, train loss = 4.3288
Iteration 7286, time = 1.03s, wps = 49603, train loss = 4.2247
Iteration 7306, time = 1.03s, wps = 49690, train loss = 4.4405
Iteration 7326, time = 1.03s, wps = 49925, train loss = 4.2696
Iteration 7346, time = 1.04s, wps = 49215, train loss = 4.3522
Iteration 7366, time = 1.04s, wps = 49158, train loss = 4.1918
Iteration 7386, time = 1.04s, wps = 49372, train loss = 4.3559
Iteration 7406, time = 1.03s, wps = 49761, train loss = 4.3906
Iteration 7426, time = 1.04s, wps = 49156, train loss = 4.2687
Iteration 7446, time = 1.04s, wps = 49328, train loss = 4.3105
Iteration 7466, time = 1.05s, wps = 48572, train loss = 4.1850
Iteration 7486, time = 1.04s, wps = 49067, train loss = 4.2436
Iteration 7506, time = 1.03s, wps = 49511, train loss = 4.2716
Iteration 7526, time = 1.03s, wps = 49594, train loss = 4.3282
Iteration 7546, time = 1.03s, wps = 49625, train loss = 4.3498
Iteration 7566, time = 1.04s, wps = 49149, train loss = 4.4008
Iteration 7586, time = 1.05s, wps = 48556, train loss = 4.2463
Iteration 7606, time = 1.04s, wps = 49237, train loss = 4.3397
Iteration 7626, time = 1.03s, wps = 49657, train loss = 4.3862
Iteration 7646, time = 1.04s, wps = 49096, train loss = 4.2831
Iteration 7666, time = 1.04s, wps = 49366, train loss = 4.1310
Iteration 7686, time = 1.04s, wps = 49362, train loss = 4.3583
Iteration 7706, time = 1.04s, wps = 49155, train loss = 4.4280
Iteration 7726, time = 1.02s, wps = 50153, train loss = 4.3875
Iteration 7746, time = 1.06s, wps = 48516, train loss = 4.2223
Iteration 7766, time = 1.05s, wps = 48740, train loss = 4.3217
Iteration 7786, time = 1.04s, wps = 49309, train loss = 4.2568
Iteration 7806, time = 1.06s, wps = 48420, train loss = 4.2596
Iteration 7826, time = 1.04s, wps = 49227, train loss = 4.4196
Iteration 7846, time = 1.04s, wps = 49006, train loss = 4.3124
Iteration 7866, time = 1.05s, wps = 48811, train loss = 4.2791
Iteration 7886, time = 1.04s, wps = 49100, train loss = 4.3356
Iteration 7906, time = 1.04s, wps = 49204, train loss = 4.2871
Iteration 7926, time = 1.06s, wps = 48383, train loss = 4.2935
Iteration 7946, time = 1.05s, wps = 48573, train loss = 4.3577
Iteration 7966, time = 1.04s, wps = 49023, train loss = 4.4071
Iteration 7986, time = 1.07s, wps = 48071, train loss = 4.3009
Iteration 8006, time = 1.05s, wps = 48684, train loss = 4.2772
Iteration 8026, time = 1.05s, wps = 48561, train loss = 4.4013
Iteration 8046, time = 1.03s, wps = 49534, train loss = 4.3527
Iteration 8066, time = 1.05s, wps = 48819, train loss = 4.4304
Iteration 8086, time = 1.07s, wps = 47853, train loss = 4.2904
Iteration 8106, time = 1.07s, wps = 48038, train loss = 4.3153
Iteration 8126, time = 1.08s, wps = 47533, train loss = 4.2469
Iteration 8146, time = 1.08s, wps = 47535, train loss = 4.3476
Iteration 8166, time = 1.08s, wps = 47427, train loss = 4.4838
Iteration 8186, time = 1.07s, wps = 47654, train loss = 4.2386
Iteration 8206, time = 1.06s, wps = 48127, train loss = 4.3112
Iteration 8226, time = 1.07s, wps = 47891, train loss = 4.3244
Iteration 8246, time = 1.06s, wps = 48369, train loss = 4.3758
Iteration 8266, time = 1.08s, wps = 47517, train loss = 4.2632
Iteration 8286, time = 1.07s, wps = 47631, train loss = 4.2796
Iteration 8306, time = 1.08s, wps = 47360, train loss = 4.3039
Iteration 8326, time = 1.08s, wps = 47264, train loss = 4.3975
Iteration 8346, time = 1.08s, wps = 47226, train loss = 4.3885
Iteration 8366, time = 1.10s, wps = 46725, train loss = 4.4229
Iteration 8386, time = 1.09s, wps = 47181, train loss = 4.2750
Iteration 8406, time = 1.07s, wps = 47747, train loss = 4.3592
Iteration 8426, time = 1.04s, wps = 49105, train loss = 4.1747
Iteration 8446, time = 1.14s, wps = 45025, train loss = 4.2834
Iteration 8466, time = 1.11s, wps = 45931, train loss = 4.3156
Iteration 8486, time = 1.10s, wps = 46463, train loss = 4.2480
Iteration 8506, time = 1.08s, wps = 47318, train loss = 4.1515
Iteration 8526, time = 1.09s, wps = 46872, train loss = 4.2296
Iteration 8546, time = 1.07s, wps = 47862, train loss = 4.3057
Iteration 8566, time = 1.09s, wps = 46957, train loss = 4.3398
Iteration 8586, time = 1.11s, wps = 46175, train loss = 4.3026
Iteration 8606, time = 1.13s, wps = 45376, train loss = 4.2207
Iteration 8626, time = 1.12s, wps = 45907, train loss = 4.3014
Iteration 8646, time = 1.08s, wps = 47210, train loss = 4.3404
Iteration 8666, time = 1.09s, wps = 46800, train loss = 4.2883
Iteration 8686, time = 1.10s, wps = 46557, train loss = 4.3363
Iteration 8706, time = 1.10s, wps = 46639, train loss = 4.2886
Iteration 8726, time = 1.09s, wps = 46802, train loss = 4.2553
Iteration 8746, time = 1.10s, wps = 46740, train loss = 4.2363
Iteration 8766, time = 1.12s, wps = 45599, train loss = 4.1765
Iteration 8786, time = 1.09s, wps = 46882, train loss = 4.4243
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00015-of-00100
Finished processing!
Iteration 8806, time = 3.13s, wps = 16368, train loss = 4.2971
Iteration 8826, time = 1.07s, wps = 47988, train loss = 4.2746
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m10.744s
user    6m23.204s
sys     0m43.298s
root@54fef8265191:/workspace/nvidia-examples/big_lstm# cat /etc/os-release
NAME="Ubuntu"
VERSION="16.04.6 LTS (Xenial Xerus)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 16.04.6 LTS"
VERSION_ID="16.04"
HOME_URL="http://www.ubuntu.com/"
SUPPORT_URL="http://help.ubuntu.com/"
BUG_REPORT_URL="http://bugs.launchpad.net/ubuntu/"
VERSION_CODENAME=xenial
UBUNTU_CODENAME=xenial
root@54fef8265191:/workspace/nvidia-examples/big_lstm# nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Fri_Feb__8_19:08:17_PST_2019
Cuda compilation tools, release 10.1, V10.1.105
root@54fef8265191:/workspace/nvidia-examples/big_lstm# cd data
root@54fef8265191:/workspace/nvidia-examples/big_lstm/data# ls
1-billion-word-language-modeling-benchmark-r13output
root@54fef8265191:/workspace/nvidia-examples/big_lstm/data# cd 1-billion-word-language-modeling-benchmark-r13output
root@54fef8265191:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# ls
1b_word_vocab.txt  heldout-monolingual.tokenized.shuffled
README             training-monolingual.tokenized.shuffled
root@54fef8265191:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# cd training-monolingual.tokenized.shuffled
root@54fef8265191:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# ls
news.en-00001-of-00100  news.en-00034-of-00100  news.en-00067-of-00100
news.en-00002-of-00100  news.en-00035-of-00100  news.en-00068-of-00100
news.en-00003-of-00100  news.en-00036-of-00100  news.en-00069-of-00100
news.en-00004-of-00100  news.en-00037-of-00100  news.en-00070-of-00100
news.en-00005-of-00100  news.en-00038-of-00100  news.en-00071-of-00100
news.en-00006-of-00100  news.en-00039-of-00100  news.en-00072-of-00100
news.en-00007-of-00100  news.en-00040-of-00100  news.en-00073-of-00100
news.en-00008-of-00100  news.en-00041-of-00100  news.en-00074-of-00100
news.en-00009-of-00100  news.en-00042-of-00100  news.en-00075-of-00100
news.en-00010-of-00100  news.en-00043-of-00100  news.en-00076-of-00100
news.en-00011-of-00100  news.en-00044-of-00100  news.en-00077-of-00100
news.en-00012-of-00100  news.en-00045-of-00100  news.en-00078-of-00100
news.en-00013-of-00100  news.en-00046-of-00100  news.en-00079-of-00100
news.en-00014-of-00100  news.en-00047-of-00100  news.en-00080-of-00100
news.en-00015-of-00100  news.en-00048-of-00100  news.en-00081-of-00100
news.en-00016-of-00100  news.en-00049-of-00100  news.en-00082-of-00100
news.en-00017-of-00100  news.en-00050-of-00100  news.en-00083-of-00100
news.en-00018-of-00100  news.en-00051-of-00100  news.en-00084-of-00100
news.en-00019-of-00100  news.en-00052-of-00100  news.en-00085-of-00100
news.en-00020-of-00100  news.en-00053-of-00100  news.en-00086-of-00100
news.en-00021-of-00100  news.en-00054-of-00100  news.en-00087-of-00100
news.en-00022-of-00100  news.en-00055-of-00100  news.en-00088-of-00100
news.en-00023-of-00100  news.en-00056-of-00100  news.en-00089-of-00100
news.en-00024-of-00100  news.en-00057-of-00100  news.en-00090-of-00100
news.en-00025-of-00100  news.en-00058-of-00100  news.en-00091-of-00100
news.en-00026-of-00100  news.en-00059-of-00100  news.en-00092-of-00100
news.en-00027-of-00100  news.en-00060-of-00100  news.en-00093-of-00100
news.en-00028-of-00100  news.en-00061-of-00100  news.en-00094-of-00100
news.en-00029-of-00100  news.en-00062-of-00100  news.en-00095-of-00100
news.en-00030-of-00100  news.en-00063-of-00100  news.en-00096-of-00100
news.en-00031-of-00100  news.en-00064-of-00100  news.en-00097-of-00100
news.en-00032-of-00100  news.en-00065-of-00100  news.en-00098-of-00100
news.en-00033-of-00100  news.en-00066-of-00100  news.en-00099-of-00100
root@54fef8265191:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# exit
exit
[chibi@centos8 ~]$ cat /etc/redhat-release
CentOS Linux release 8.2.2004 (Core)
[chibi@centos8 ~]$ nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Wed_May__6_19:09:25_PDT_2020
Cuda compilation tools, release 11.0, V11.0.167
Build cuda_11.0_bu.TC445_37.28358933_0
[chibi@centos8 ~]$ sensors
k10temp-pci-00c3
Adapter: PCI adapter
Tdie:         +32.8°C  (high = +70.0°C)
Tctl:         +32.8°C

[chibi@centos8 ~]$ sudo nvme list
[sudo] chibi のパスワード:
Node             SN                   Model                                    Namespace Usage                      Format           FW Rev
---------------- -------------------- ---------------------------------------- --------- -------------------------- ---------------- --------
/dev/nvme0n1     P02938115263         PLEXTOR PX-512M9PeG                      1         512.11  GB / 512.11  GB    512   B +  0 B   1.07
[chibi@centos8 ~]$ sudo nvme smart-log /dev/nvme0n1
Smart Log for NVME device:nvme0n1 namespace-id:ffffffff
critical_warning                    : 0
temperature                         : 57 C
available_spare                     : 100%
available_spare_threshold           : 0%
percentage_used                     : 0%
data_units_read                     : 620,108
data_units_written                  : 2,644,808
host_read_commands                  : 12,524,628
host_write_commands                 : 16,847,327
controller_busy_time                : 588
power_cycles                        : 70
power_on_hours                      : 72
unsafe_shutdowns                    : 16
media_errors                        : 0
num_err_log_entries                 : 0
Warning Temperature Time            : 0
Critical Composite Temperature Time : 0
Temperature Sensor 1                : 57 C
Thermal Management T1 Trans Count   : 0
Thermal Management T2 Trans Count   : 0
Thermal Management T1 Total Time    : 0
Thermal Management T2 Total Time    : 0
[chibi@centos8 ~]$ sudo nvme smart-log /dev/nvme0n1
Smart Log for NVME device:nvme0n1 namespace-id:ffffffff
critical_warning                    : 0
temperature                         : 57 C
available_spare                     : 100%
available_spare_threshold           : 0%
percentage_used                     : 0%
data_units_read                     : 620,108
data_units_written                  : 2,644,808
host_read_commands                  : 12,524,628
host_write_commands                 : 16,847,342
controller_busy_time                : 588
power_cycles                        : 70
power_on_hours                      : 72
unsafe_shutdowns                    : 16
media_errors                        : 0
num_err_log_entries                 : 0
Warning Temperature Time            : 0
Critical Composite Temperature Time : 0
Temperature Sensor 1                : 57 C
Thermal Management T1 Trans Count   : 0
Thermal Management T2 Trans Count   : 0
Thermal Management T1 Total Time    : 0
Thermal Management T2 Total Time    : 0
[chibi@centos8 ~]$ nvidia-smi nvlink -c
GPU 0: TITAN RTX (UUID: GPU-5a71d61e-f130-637a-b33d-4df555b0ed88)
GPU 1: TITAN RTX (UUID: GPU-7fb51c1d-c1e7-35cc-aad7-66971f05ddb7)
GPU 2: GeForce RTX 2080 Ti (UUID: GPU-1ac935c2-557f-282e-14e5-3f749ffd63ac)
GPU 3: GeForce RTX 2080 Ti (UUID: GPU-13277ce5-e1e9-0cb1-8cee-6c9e6618e774)
[chibi@centos8 ~]$ lsmem
RANGE                                  SIZE  STATE REMOVABLE     BLOCK
0x0000000000000000-0x0000000007ffffff  128M online        no         0
0x0000000008000000-0x000000002fffffff  640M online       yes       1-5
0x0000000030000000-0x0000000037ffffff  128M online        no         6
0x0000000038000000-0x000000006fffffff  896M online       yes      7-13
0x0000000070000000-0x0000000077ffffff  128M online        no        14
0x0000000078000000-0x000000007fffffff  128M online       yes        15
0x0000000080000000-0x000000009fffffff  512M online        no     16-19
0x0000000100000000-0x0000000107ffffff  128M online        no        32
0x0000000108000000-0x000000030fffffff  8.1G online       yes     33-97
0x0000000310000000-0x0000000317ffffff  128M online        no        98
0x0000000318000000-0x000000031fffffff  128M online       yes        99
0x0000000320000000-0x0000000327ffffff  128M online        no       100
0x0000000328000000-0x000000038fffffff  1.6G online       yes   101-113
0x0000000390000000-0x0000000397ffffff  128M online        no       114
0x0000000398000000-0x000000040fffffff  1.9G online       yes   115-129
0x0000000410000000-0x0000000417ffffff  128M online        no       130
0x0000000418000000-0x000000041fffffff  128M online       yes       131
0x0000000420000000-0x0000000427ffffff  128M online        no       132
0x0000000428000000-0x00000004a7ffffff    2G online       yes   133-148
0x00000004a8000000-0x00000004afffffff  128M online        no       149
0x00000004b0000000-0x0000000507ffffff  1.4G online       yes   150-160
0x0000000508000000-0x000000050fffffff  128M online        no       161
0x0000000510000000-0x0000000587ffffff  1.9G online       yes   162-176
0x0000000588000000-0x000000058fffffff  128M online        no       177
0x0000000590000000-0x00000005d7ffffff  1.1G online       yes   178-186
0x00000005d8000000-0x00000005dfffffff  128M online        no       187
0x00000005e0000000-0x00000005ffffffff  512M online       yes   188-191
0x0000000600000000-0x0000000607ffffff  128M online        no       192
0x0000000608000000-0x0000000617ffffff  256M online       yes   193-194
0x0000000618000000-0x000000062fffffff  384M online        no   195-197
0x0000000630000000-0x000000063fffffff  256M online       yes   198-199
0x0000000640000000-0x0000000647ffffff  128M online        no       200
0x0000000648000000-0x000000065fffffff  384M online       yes   201-203
0x0000000660000000-0x0000000667ffffff  128M online        no       204
0x0000000668000000-0x0000000677ffffff  256M online       yes   205-206
0x0000000678000000-0x000000069fffffff  640M online        no   207-211
0x00000006a0000000-0x00000006afffffff  256M online       yes   212-213
0x00000006b0000000-0x00000006b7ffffff  128M online        no       214
0x00000006b8000000-0x00000007b7ffffff    4G online       yes   215-246
0x00000007b8000000-0x00000007bfffffff  128M online        no       247
0x00000007c0000000-0x00000007cfffffff  256M online       yes   248-249
0x00000007d0000000-0x00000007e7ffffff  384M online        no   250-252
0x00000007e8000000-0x00000007efffffff  128M online       yes       253
0x00000007f0000000-0x00000007ffffffff  256M online        no   254-255
0x0000000800000000-0x000000082fffffff  768M online       yes   256-261
0x0000000830000000-0x0000000867ffffff  896M online        no   262-268
0x0000000868000000-0x0000000affffffff 10.4G online       yes   269-351
0x0000000b00000000-0x0000000b07ffffff  128M online        no       352
0x0000000b08000000-0x0000000b3fffffff  896M online       yes   353-359
0x0000000b40000000-0x0000000b47ffffff  128M online        no       360
0x0000000b48000000-0x0000000b5fffffff  384M online       yes   361-363
0x0000000b60000000-0x0000000b67ffffff  128M online        no       364
0x0000000b68000000-0x0000000b6fffffff  128M online       yes       365
0x0000000b70000000-0x0000000b7fffffff  256M online        no   366-367
0x0000000b80000000-0x0000000b97ffffff  384M online       yes   368-370
0x0000000b98000000-0x0000000b9fffffff  128M online        no       371
0x0000000ba0000000-0x0000000bdfffffff    1G online       yes   372-379
0x0000000be0000000-0x0000000be7ffffff  128M online        no       380
0x0000000be8000000-0x0000000c07ffffff  512M online       yes   381-384
0x0000000c08000000-0x0000000c0fffffff  128M online        no       385
0x0000000c10000000-0x0000000c2fffffff  512M online       yes   386-389
0x0000000c30000000-0x0000000c37ffffff  128M online        no       390
0x0000000c38000000-0x0000000c57ffffff  512M online       yes   391-394
0x0000000c58000000-0x0000000c67ffffff  256M online        no   395-396
0x0000000c68000000-0x0000000c6fffffff  128M online       yes       397
0x0000000c70000000-0x0000000c7fffffff  256M online        no   398-399
0x0000000c80000000-0x0000000d2fffffff  2.8G online       yes   400-421
0x0000000d30000000-0x0000000d5fffffff  768M online        no   422-427
0x0000000d60000000-0x0000000d67ffffff  128M online       yes       428
0x0000000d68000000-0x0000000d6fffffff  128M online        no       429
0x0000000d70000000-0x0000000df7ffffff  2.1G online       yes   430-446
0x0000000df8000000-0x0000000dffffffff  128M online        no       447
0x0000000e00000000-0x0000000f87ffffff  6.1G online       yes   448-496
0x0000000f88000000-0x0000000f8fffffff  128M online        no       497
0x0000000f90000000-0x0000000fafffffff  512M online       yes   498-501
0x0000000fb0000000-0x0000000fb7ffffff  128M online        no       502
0x0000000fb8000000-0x0000000fc7ffffff  256M online       yes   503-504
0x0000000fc8000000-0x0000000fcfffffff  128M online        no       505
0x0000000fd0000000-0x0000000fd7ffffff  128M online       yes       506
0x0000000fd8000000-0x0000000fe7ffffff  256M online        no   507-508
0x0000000fe8000000-0x0000000fefffffff  128M online       yes       509
0x0000000ff0000000-0x0000000fffffffff  256M online        no   510-511
0x0000001000000000-0x0000001007ffffff  128M online       yes       512
0x0000001008000000-0x0000001067ffffff  1.5G online        no   513-524
0x0000001068000000-0x00000015f7ffffff 22.3G online       yes   525-702
0x00000015f8000000-0x00000015ffffffff  128M online        no       703
0x0000001600000000-0x00000016d7ffffff  3.4G online       yes   704-730
0x00000016d8000000-0x00000016efffffff  384M online        no   731-733
0x00000016f0000000-0x00000016ffffffff  256M online       yes   734-735
0x0000001700000000-0x0000001707ffffff  128M online        no       736
0x0000001708000000-0x000000177fffffff  1.9G online       yes   737-751
0x0000001780000000-0x00000017d7ffffff  1.4G online        no   752-762
0x00000017d8000000-0x00000017dfffffff  128M online       yes       763
0x00000017e0000000-0x00000017e7ffffff  128M online        no       764
0x00000017e8000000-0x00000017f7ffffff  256M online       yes   765-766
0x00000017f8000000-0x00000017ffffffff  128M online        no       767
0x0000001800000000-0x000000180fffffff  256M online       yes   768-769
0x0000001810000000-0x0000001867ffffff  1.4G online        no   770-780
0x0000001868000000-0x0000001dffffffff 22.4G online       yes   781-959
0x0000001e00000000-0x0000001e07ffffff  128M online        no       960
0x0000001e08000000-0x0000001e17ffffff  256M online       yes   961-962
0x0000001e18000000-0x0000001e1fffffff  128M online        no       963
0x0000001e20000000-0x0000001e77ffffff  1.4G online       yes   964-974
0x0000001e78000000-0x0000001eafffffff  896M online        no   975-981
0x0000001eb0000000-0x0000001eb7ffffff  128M online       yes       982
0x0000001eb8000000-0x0000001ec7ffffff  256M online        no   983-984
0x0000001ec8000000-0x0000001ee7ffffff  512M online       yes   985-988
0x0000001ee8000000-0x0000001f07ffffff  512M online        no   989-992
0x0000001f08000000-0x0000001f2fffffff  640M online       yes   993-997
0x0000001f30000000-0x0000001f4fffffff  512M online        no  998-1001
0x0000001f50000000-0x0000001f5fffffff  256M online       yes 1002-1003
0x0000001f60000000-0x0000001f67ffffff  128M online        no      1004
0x0000001f68000000-0x0000001f7fffffff  384M online       yes 1005-1007
0x0000001f80000000-0x0000001f87ffffff  128M online        no      1008
0x0000001f88000000-0x0000001f8fffffff  128M online       yes      1009
0x0000001f90000000-0x0000001f9fffffff  256M online        no 1010-1011
0x0000001fa0000000-0x0000001fa7ffffff  128M online       yes      1012
0x0000001fa8000000-0x0000001fd7ffffff  768M online        no 1013-1018
0x0000001fd8000000-0x0000001fe7ffffff  256M online       yes 1019-1020
0x0000001fe8000000-0x0000001fefffffff  128M online        no      1021
0x0000001ff0000000-0x0000001ff7ffffff  128M online       yes      1022
0x0000001ff8000000-0x000000205fffffff  1.6G online        no 1023-1035

メモリブロックサイズ  128M
Total online memory:     128G
Total offline memory:      0B
[chibi@centos8 ~]$ lscpu
アーキテクチャ:                      x86_64
CPU 操作モード:                      32-bit, 64-bit
バイト順序:                          Little Endian
CPU:                                 32
オンラインになっている CPU のリスト: 0-31
コアあたりのスレッド数:              2
ソケットあたりのコア数:              16
ソケット数:                          1
NUMA ノード数:                       4
ベンダー ID:                         AuthenticAMD
CPU ファミリー:                      23
モデル:                              49
モデル名:                            AMD EPYC 7302P 16-Core Processor
ステッピング:                        0
CPU MHz:                             3283.016
CPU 最大 MHz:                        3000.0000
CPU 最小 MHz:                        1500.0000
BogoMIPS:                            5988.87
仮想化:                              AMD-V
L1d キャッシュ:                      32K
L1i キャッシュ:                      32K
L2 キャッシュ:                       512K
L3 キャッシュ:                       16384K
NUMA ノード 0 CPU:                   0-3,16-19
NUMA ノード 1 CPU:                   4-7,20-23
NUMA ノード 2 CPU:                   8-11,24-27
NUMA ノード 3 CPU:                   12-15,28-31
フラグ:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
[chibi@centos8 ~]$ lstopo
Machine (126GB total) + Package L#0
  NUMANode L#0 (P#0 31GB)
    L3 L#0 (16MB)
      L2 L#0 (512KB) + L1d L#0 (32KB) + L1i L#0 (32KB) + Core L#0
        PU L#0 (P#0)
        PU L#1 (P#16)
      L2 L#1 (512KB) + L1d L#1 (32KB) + L1i L#1 (32KB) + Core L#1
        PU L#2 (P#1)
        PU L#3 (P#17)
    L3 L#1 (16MB)
      L2 L#2 (512KB) + L1d L#2 (32KB) + L1i L#2 (32KB) + Core L#2
        PU L#4 (P#2)
        PU L#5 (P#18)
      L2 L#3 (512KB) + L1d L#3 (32KB) + L1i L#3 (32KB) + Core L#3
        PU L#6 (P#3)
        PU L#7 (P#19)
    HostBridge L#0
      PCIBridge
        PCI 10de:1e07
          GPU L#0 "renderD128"
          GPU L#1 "card0"
  NUMANode L#1 (P#1 31GB)
    L3 L#2 (16MB)
      L2 L#4 (512KB) + L1d L#4 (32KB) + L1i L#4 (32KB) + Core L#4
        PU L#8 (P#4)
        PU L#9 (P#20)
      L2 L#5 (512KB) + L1d L#5 (32KB) + L1i L#5 (32KB) + Core L#5
        PU L#10 (P#5)
        PU L#11 (P#21)
    L3 L#3 (16MB)
      L2 L#6 (512KB) + L1d L#6 (32KB) + L1i L#6 (32KB) + Core L#6
        PU L#12 (P#6)
        PU L#13 (P#22)
      L2 L#7 (512KB) + L1d L#7 (32KB) + L1i L#7 (32KB) + Core L#7
        PU L#14 (P#7)
        PU L#15 (P#23)
    HostBridge L#2
      PCIBridge
        PCI 10de:1e07
          GPU L#2 "card1"
          GPU L#3 "renderD129"
      PCIBridge
        PCI 1022:7901
          Block(Other) L#4 "sr0"
      PCIBridge
        PCI 1022:7901
  NUMANode L#2 (P#2 31GB)
    L3 L#4 (16MB)
      L2 L#8 (512KB) + L1d L#8 (32KB) + L1i L#8 (32KB) + Core L#8
        PU L#16 (P#8)
        PU L#17 (P#24)
      L2 L#9 (512KB) + L1d L#9 (32KB) + L1i L#9 (32KB) + Core L#9
        PU L#18 (P#9)
        PU L#19 (P#25)
    L3 L#5 (16MB)
      L2 L#10 (512KB) + L1d L#10 (32KB) + L1i L#10 (32KB) + Core L#10
        PU L#20 (P#10)
        PU L#21 (P#26)
      L2 L#11 (512KB) + L1d L#11 (32KB) + L1i L#11 (32KB) + Core L#11
        PU L#22 (P#11)
        PU L#23 (P#27)
    HostBridge L#6
      PCIBridge
        PCI 10de:1e02
          GPU L#5 "renderD130"
          GPU L#6 "card2"
      PCIBridge
        PCI 1022:7901
  NUMANode L#3 (P#3 31GB)
    L3 L#6 (16MB)
      L2 L#12 (512KB) + L1d L#12 (32KB) + L1i L#12 (32KB) + Core L#12
        PU L#24 (P#12)
        PU L#25 (P#28)
      L2 L#13 (512KB) + L1d L#13 (32KB) + L1i L#13 (32KB) + Core L#13
        PU L#26 (P#13)
        PU L#27 (P#29)
    L3 L#7 (16MB)
      L2 L#14 (512KB) + L1d L#14 (32KB) + L1i L#14 (32KB) + Core L#14
        PU L#28 (P#14)
        PU L#29 (P#30)
      L2 L#15 (512KB) + L1d L#15 (32KB) + L1i L#15 (32KB) + Core L#15
        PU L#30 (P#15)
        PU L#31 (P#31)
    HostBridge L#9
      PCIBridge
        PCI 14a4:23f1
      PCIBridge
        PCI 8086:1533
          Net L#7 "eth0"
      PCIBridge
        PCI 8086:1533
          Net L#8 "eth1"
      PCIBridge
        PCI 10de:1e02
          GPU L#9 "card3"
          GPU L#10 "renderD131"
[chibi@centos8 ~]$ cat /proc/meminfo
MemTotal:       131618148 kB
MemFree:        122189640 kB
MemAvailable:   129117576 kB
Buffers:            1060 kB
Cached:          7555380 kB
SwapCached:            0 kB
Active:          1286308 kB
Inactive:        6787256 kB
Active(anon):     494480 kB
Inactive(anon):     9756 kB
Active(file):     791828 kB
Inactive(file):  6777500 kB
Unevictable:           0 kB
Mlocked:               0 kB
SwapTotal:             0 kB
SwapFree:              0 kB
Dirty:                20 kB
Writeback:             0 kB
AnonPages:        502240 kB
Mapped:           300060 kB
Shmem:             12204 kB
KReclaimable:     315248 kB
Slab:             823684 kB
SReclaimable:     315248 kB
SUnreclaim:       508436 kB
KernelStack:       12224 kB
PageTables:        25768 kB
NFS_Unstable:          0 kB
Bounce:                0 kB
WritebackTmp:          0 kB
CommitLimit:    65809072 kB
Committed_AS:    3188696 kB
VmallocTotal:   34359738367 kB
VmallocUsed:           0 kB
VmallocChunk:          0 kB
Percpu:            20480 kB
HardwareCorrupted:     0 kB
AnonHugePages:    227328 kB
ShmemHugePages:        0 kB
ShmemPmdMapped:        0 kB
HugePages_Total:       0
HugePages_Free:        0
HugePages_Rsvd:        0
HugePages_Surp:        0
Hugepagesize:       2048 kB
Hugetlb:               0 kB
DirectMap4k:     1593036 kB
DirectMap2M:    32874496 kB
DirectMap1G:    100663296 kB
[chibi@centos8 ~]$ free
              total        used        free      shared  buff/cache   available
Mem:      131618148     1556720   122189740       12204     7871688   129117676
Swap:             0           0           0
[chibi@centos8 ~]$ sensors
k10temp-pci-00c3
Adapter: PCI adapter
Tdie:         +31.6°C  (high = +70.0°C)
Tctl:         +31.6°C

[chibi@centos8 ~]$ sudo nvme smart-log /dev/nvme0n1
Smart Log for NVME device:nvme0n1 namespace-id:ffffffff
critical_warning                    : 0
temperature                         : 56 C
available_spare                     : 100%
available_spare_threshold           : 0%
percentage_used                     : 0%
data_units_read                     : 620,110
data_units_written                  : 2,644,809
host_read_commands                  : 12,524,650
host_write_commands                 : 16,847,378
controller_busy_time                : 588
power_cycles                        : 70
power_on_hours                      : 72
unsafe_shutdowns                    : 16
media_errors                        : 0
num_err_log_entries                 : 0
Warning Temperature Time            : 0
Critical Composite Temperature Time : 0
Temperature Sensor 1                : 56 C
Thermal Management T1 Trans Count   : 0
Thermal Management T2 Trans Count   : 0
Thermal Management T1 Total Time    : 0
Thermal Management T2 Total Time    : 0
[chibi@centos8 ~]$ sudo nvme smart-log /dev/nvme0n1
Smart Log for NVME device:nvme0n1 namespace-id:ffffffff
critical_warning                    : 0
temperature                         : 54 C
available_spare                     : 100%
available_spare_threshold           : 0%
percentage_used                     : 0%
data_units_read                     : 620,110
data_units_written                  : 2,644,809
host_read_commands                  : 12,524,650
host_write_commands                 : 16,847,402
controller_busy_time                : 588
power_cycles                        : 70
power_on_hours                      : 72
unsafe_shutdowns                    : 16
media_errors                        : 0
num_err_log_entries                 : 0
Warning Temperature Time            : 0
Critical Composite Temperature Time : 0
Temperature Sensor 1                : 54 C
Thermal Management T1 Trans Count   : 0
Thermal Management T2 Trans Count   : 0
Thermal Management T1 Total Time    : 0
Thermal Management T2 Total Time    : 0
[chibi@centos8 ~]$