[chibi@rhel8 ~]$ sudo nvidia-docker run --rm -ti nvcr.io/nvidia/tensorflow:19.04-py3
Unable to find image 'nvcr.io/nvidia/tensorflow:19.04-py3' locally
19.04-py3: Pulling from nvidia/tensorflow
34667c7e4631: Pulling fs layer
d18d76a881a4: Pulling fs layer
119c7358fbfc: Pulling fs layer
2aaf13f3eff0: Pulling fs layer
2aaf13f3eff0: Waiting
3b700a61ede6: Waiting
87e6ca450d3f: Waiting
a1e76dce1aec: Waiting
9b91fa2f9276: Waiting
b5877a9add73: Pulling fs layer
bab74df105f1: Waiting
534bbf505504: Waiting
4956bf3bbbb9: Waiting
f4371944c97d: Pulling fs layer
4615a735431d: Pulling fs layer
5db2639932b5: Pulling fs layer
629d5c9d75a4: Waiting
8071b94b5429: Pulling fs layer
5db2639932b5: Waiting
e32e86c15b8b: Waiting
f4371944c97d: Waiting
8071b94b5429: Waiting
3498ed8c5685: Waiting
62819d8896c1: Waiting
34bc85bf8bef: Waiting
4a95ca3431c4: Waiting
41bc2d0a4d4d: Waiting
a2ceadc61854: Waiting
2d0c5308ff92: Waiting
a531832992b8: Waiting
b24a8fd8f2e1: Waiting
8d9313624ab7: Waiting
e5cafe011f22: Pull complete
eca19a329cd4: Pull complete
65ee50af0bcc: Pull complete
5f60ec8c32f4: Pull complete
d7dcb657fa13: Pull complete
1f6ef6575fbe: Pull complete
d1ef346a3015: Pull complete
4ef9cb404fd5: Pull complete
f6797f45a018: Pull complete
1d4380527325: Pull complete
965f2629db02: Pull complete
5debff4c8c0a: Pull complete
b3a3a9d82be6: Pull complete
eac05f20b729: Pull complete
3ce0a7f80167: Pull complete
2a21e34a5784: Pull complete
c1ccf19e258e: Pull complete
0b6ea9d0652b: Pull complete
307bc8c3f024: Pull complete
ca75fd593a79: Pull complete
0cd3cdca1af7: Pull complete
48e857e9d372: Pull complete
3264ea403ca9: Pull complete
Digest: sha256:aaebc136d5d50937362675c77afd908bd96cded68846f39163050a023c8a9851
Status: Downloaded newer image for nvcr.io/nvidia/tensorflow:19.04-py3
                                                                                
================
== TensorFlow ==
================

NVIDIA Release 19.04 (build 6132408)
TensorFlow Version 1.13.1

Container image Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
Copyright 2017-2019 The TensorFlow Authors.  All rights reserved.

Various files include modifications (c) NVIDIA CORPORATION.  All rights reserved.
NVIDIA modifications are covered by the license terms that apply to the underlying project or file.

NOTE: MOFED driver for multi-node communication was not detected.
      Multi-node communication performance may be reduced.

NOTE: The SHMEM allocation limit is set to the default of 64MB.  This may be
   insufficient for TensorFlow.  NVIDIA recommends the use of the following flags:
   nvidia-docker run --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 ...

root@9250dfb71258:/workspace# ls
README.md  docker-examples  nvidia-examples
root@9250dfb71258:/workspace# cd nvidia-examples
root@9250dfb71258:/workspace/nvidia-examples# ls
NCF              bert                 cnn           ssdv1.2
OpenSeq2Seq      big_lstm             gnmt_v2       tensorrt
UNet_Industrial  build_imagenet_data  resnet50v1.5
root@9250dfb71258:/workspace/nvidia-examples# cd big_lstm
root@9250dfb71258:/workspace/nvidia-examples/big_lstm# ls
1b_word_vocab.txt  data_utils_test.py         language_model_test.py
README.md          download_1b_words_data.sh  model_utils.py
__init__.py        hparams.py                 run_utils.py
common.py          hparams_test.py            single_lm_train.py
data_utils.py      language_model.py          testdata
root@9250dfb71258:/workspace/nvidia-examples/big_lstm# ./download_1b_words_data.sh
Please specify root of dataset directory: data

Success: dataset root dir validated

--2020-06-11 22:05:06--  http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
Resolving www.statmt.org (www.statmt.org)... 129.215.197.184
Connecting to www.statmt.org (www.statmt.org)|129.215.197.184|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1792209805 (1.7G) [application/x-gzip]
Saving to: ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’

1-billion-word-lang 100%[===================>]   1.67G  1.18MB/s    in 40m 16s

2020-06-11 22:45:23 (724 KB/s) - ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’ saved [1792209805/1792209805]

1-billion-word-language-modeling-benchmark-r13output/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00024-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00057-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00055-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00096-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00081-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00033-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00072-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00082-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00018-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00008-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00059-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00005-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00091-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00062-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00031-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00095-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00076-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00006-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00038-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00015-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00087-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00021-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00049-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00009-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00027-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00056-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00046-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00032-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00029-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00088-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00085-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00011-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00012-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00067-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00003-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00093-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00050-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00053-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00044-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00019-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00066-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00028-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00045-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00039-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00071-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00052-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00078-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00037-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00002-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00014-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00048-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00017-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00004-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00077-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00080-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00020-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00051-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00016-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00079-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00043-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00068-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00099-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00064-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00034-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00054-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00040-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00070-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00063-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00041-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00083-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00061-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00073-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00094-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00030-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00060-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00035-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00023-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00042-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00025-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00090-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00089-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00065-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00075-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00022-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00026-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00098-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00084-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00010-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00069-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00013-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00092-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00036-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00097-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00007-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00074-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00001-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00047-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00086-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00058-of-00100
1-billion-word-language-modeling-benchmark-r13output/.svn/
1-billion-word-language-modeling-benchmark-r13output/.svn/tmp/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/de102cd0c91cd19e6612f0840e68a2f20ba8134c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/deed1b75d3bd5cc36ae6aeb85d56680b892b7948.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/86c58db52fbf362c5bc329afc33b8805085fcb0d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/9f2882e21f860a83ad6ea8898ebab140974ed301.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/bcdbc523ee7488dc438cab869b6d5e236578dbfa.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/d2718bc26d0ee0a213d7d4add99a304cb5b39ede.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/c5b24f61479da923123d0394a188da922ea0359c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/116d6ea61730d8199127596b072e981338597779.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/b0e26559cfe641245584a9400b35ba28d64f1411.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/d3ae508e3bcb0e696dd70aecd052410f1f7afc1d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/9e148bd766e8805e0eb97eeae250433ec7a2e996.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/31b645a482e0b81fda3c567cada307c6fcf7ec80.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/da39a3ee5e6b4b0d3255bfef95601890afd80709.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/c1ed42c415ec884e591fb5c70d373da640a383b5.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/e37ba0f85e94073ccaced1eed7e4f5d737a25f49.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/entries
1-billion-word-language-modeling-benchmark-r13output/.svn/format
1-billion-word-language-modeling-benchmark-r13output/.svn/wc.db
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00015-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00031-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00027-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00010-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00033-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00042-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00046-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00037-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00029-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00013-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00002-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00048-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00006-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00030-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00025-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00039-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00008-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00020-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00001-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00034-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00044-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00045-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00016-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00004-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00035-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00038-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00009-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00024-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00022-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00021-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00032-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00011-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00049-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00041-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00019-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00023-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00040-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00014-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00007-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00017-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00012-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00018-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00003-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00028-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en-00000-of-00100
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00043-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00005-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00036-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00026-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00047-of-00050
1-billion-word-language-modeling-benchmark-r13output/README

Success! One billion words dataset ready at:
data/1-billion-word-language-modeling-benchmark-r13output/
Please pass this dir to single_lm_train.py via the --datadir option.

root@9250dfb71258:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=4 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'num_gpus': 4, 'optimizer': 0, 'max_grad_norm': 10.0, 'num_steps': 20, 'keep_prob': 0.9, 'run_profiler': False, 'batch_size': 128, 'num_shards': 8, 'num_sampled': 8192, 'state_size': 2048, 'emb_size': 512, 'learning_rate': 0.2, 'num_layers': 1, 'projected_size': 512, 'max_time': 180, 'do_summaries': False, 'num_delayed_steps': 150, 'average_params': True, 'vocab_size': 793470}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1591915725.5683463
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
model/model_2/state_2_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:2
model/model_3/state_3_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:3
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-06-11 22:48:46.237473: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2900040000 Hz
2020-06-11 22:48:46.243788: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xbd09df0 executing computations on platform Host. Devices:
2020-06-11 22:48:46.243832: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-06-11 22:48:46.703900: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-11 22:48:46.733813: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-11 22:48:46.740400: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-11 22:48:46.741258: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xbd09810 executing computations on platform CUDA. Devices:
2020-06-11 22:48:46.741291: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-06-11 22:48:46.741298: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-06-11 22:48:46.741308: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-11 22:48:46.741315: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-11 22:48:46.742406: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:01:00.0
totalMemory: 23.65GiB freeMemory: 23.27GiB
2020-06-11 22:48:46.742435: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.49GiB
2020-06-11 22:48:46.742457: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4b:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-06-11 22:48:46.742479: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4c:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-06-11 22:48:46.742504: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-06-11 22:48:47.386132: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-06-11 22:48:47.386178: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-06-11 22:48:47.386183: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-06-11 22:48:47.386189: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-06-11 22:48:47.386193: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-06-11 22:48:47.386197: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-06-11 22:48:47.386340: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22553 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-06-11 22:48:47.386562: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22765 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-06-11 22:48:47.386841: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10231 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:4b:00.0, compute capability: 7.5)
2020-06-11 22:48:47.386994: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10231 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:4c:00.0, compute capability: 7.5)
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00059-of-00100
Finished processing!
2020-06-11 22:49:06.860477: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 1, time = 10.03s, wps = 1021, train loss = 13.0187
Iteration 2, time = 7.90s, wps = 1296, train loss = 12.9609
Iteration 3, time = 0.11s, wps = 96205, train loss = 12.8619
Iteration 4, time = 0.10s, wps = 99959, train loss = 11.4333
Iteration 5, time = 0.10s, wps = 104682, train loss = 12.7135
Iteration 6, time = 0.09s, wps = 108805, train loss = 11.5721
Iteration 7, time = 0.10s, wps = 107661, train loss = 75.0950
Iteration 8, time = 0.10s, wps = 105700, train loss = 26.9775
Iteration 9, time = 0.10s, wps = 104250, train loss = 15.7163
Iteration 20, time = 1.03s, wps = 109565, train loss = 13.9983
Iteration 40, time = 1.87s, wps = 109351, train loss = 11.1481
Iteration 60, time = 1.91s, wps = 106947, train loss = 8.9665
Iteration 80, time = 1.89s, wps = 108098, train loss = 8.4840
Iteration 100, time = 1.90s, wps = 107989, train loss = 7.8867
Iteration 120, time = 1.90s, wps = 107953, train loss = 7.4590
Iteration 140, time = 1.89s, wps = 108483, train loss = 7.1073
Iteration 160, time = 1.89s, wps = 108101, train loss = 6.7072
Iteration 180, time = 1.88s, wps = 108816, train loss = 6.4723
Iteration 200, time = 1.88s, wps = 108811, train loss = 6.3571
Iteration 220, time = 1.89s, wps = 108351, train loss = 6.3816
Iteration 240, time = 1.90s, wps = 107526, train loss = 6.2603
Iteration 260, time = 1.90s, wps = 107529, train loss = 6.1091
Iteration 280, time = 1.88s, wps = 108938, train loss = 6.1168
Iteration 300, time = 1.90s, wps = 107649, train loss = 6.0549
Iteration 320, time = 1.91s, wps = 107315, train loss = 6.0120
Iteration 340, time = 1.90s, wps = 107590, train loss = 5.9381
Iteration 360, time = 1.90s, wps = 107634, train loss = 5.8989
Iteration 380, time = 1.90s, wps = 108022, train loss = 5.8705
Iteration 400, time = 1.90s, wps = 107636, train loss = 5.8749
Iteration 420, time = 1.90s, wps = 108020, train loss = 5.8733
Iteration 440, time = 1.89s, wps = 108483, train loss = 5.7532
Iteration 460, time = 1.90s, wps = 107754, train loss = 5.7384
Iteration 480, time = 1.89s, wps = 108402, train loss = 5.8136
Iteration 500, time = 1.92s, wps = 106680, train loss = 5.6850
Iteration 520, time = 1.89s, wps = 108286, train loss = 5.6766
Iteration 540, time = 1.89s, wps = 108313, train loss = 5.6447
Iteration 560, time = 1.90s, wps = 107924, train loss = 5.5465
Iteration 580, time = 1.90s, wps = 107862, train loss = 5.5616
Iteration 600, time = 1.90s, wps = 107551, train loss = 5.5800
Iteration 620, time = 1.91s, wps = 107273, train loss = 5.5946
Iteration 640, time = 1.91s, wps = 107410, train loss = 5.6388
Iteration 660, time = 1.89s, wps = 108328, train loss = 5.5468
Iteration 680, time = 1.88s, wps = 108850, train loss = 5.5563
Iteration 700, time = 1.91s, wps = 107146, train loss = 5.4604
Iteration 720, time = 1.92s, wps = 106536, train loss = 5.4435
Iteration 740, time = 1.89s, wps = 108121, train loss = 5.3766
Iteration 760, time = 1.87s, wps = 109553, train loss = 5.4060
Iteration 780, time = 1.89s, wps = 108331, train loss = 5.4466
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00043-of-00100
Finished processing!
Iteration 800, time = 3.50s, wps = 58487, train loss = 5.4340
Iteration 820, time = 1.89s, wps = 108402, train loss = 5.4201
Iteration 840, time = 1.92s, wps = 106641, train loss = 5.3494
Iteration 860, time = 1.89s, wps = 108564, train loss = 5.3249
Iteration 880, time = 1.90s, wps = 107940, train loss = 5.3376
Iteration 900, time = 1.88s, wps = 108717, train loss = 5.3677
Iteration 920, time = 1.89s, wps = 108371, train loss = 5.3094
Iteration 940, time = 1.90s, wps = 107550, train loss = 5.2512
Iteration 960, time = 1.89s, wps = 108586, train loss = 5.2754
Iteration 980, time = 1.89s, wps = 108505, train loss = 5.2144
Iteration 1000, time = 1.90s, wps = 107868, train loss = 5.2696
Iteration 1020, time = 1.88s, wps = 108793, train loss = 5.1913
Iteration 1040, time = 1.91s, wps = 107472, train loss = 5.1748
Iteration 1060, time = 1.90s, wps = 107650, train loss = 5.2080
Iteration 1080, time = 1.87s, wps = 109230, train loss = 5.1542
Iteration 1100, time = 1.89s, wps = 108174, train loss = 5.1616
Iteration 1120, time = 1.89s, wps = 108470, train loss = 5.1663
Iteration 1140, time = 1.91s, wps = 107000, train loss = 5.1499
Iteration 1160, time = 1.89s, wps = 108571, train loss = 5.1479
Iteration 1180, time = 1.89s, wps = 108200, train loss = 5.1335
Iteration 1200, time = 1.91s, wps = 107396, train loss = 5.1495
Iteration 1220, time = 1.88s, wps = 108942, train loss = 5.0894
Iteration 1240, time = 1.89s, wps = 108388, train loss = 5.1275
Iteration 1260, time = 1.90s, wps = 107676, train loss = 5.0776
Iteration 1280, time = 1.89s, wps = 108570, train loss = 5.0867
Iteration 1300, time = 1.91s, wps = 107468, train loss = 5.1187
Iteration 1320, time = 1.90s, wps = 107736, train loss = 5.0020
Iteration 1340, time = 1.91s, wps = 107230, train loss = 5.0417
Iteration 1360, time = 1.91s, wps = 107415, train loss = 5.0195
Iteration 1380, time = 1.90s, wps = 107592, train loss = 5.0306
Iteration 1400, time = 1.90s, wps = 107941, train loss = 5.0507
Iteration 1420, time = 1.91s, wps = 107410, train loss = 4.9524
Iteration 1440, time = 1.90s, wps = 107639, train loss = 4.9743
Iteration 1460, time = 1.91s, wps = 107155, train loss = 4.9278
Iteration 1480, time = 1.91s, wps = 107442, train loss = 4.9774
Iteration 1500, time = 1.93s, wps = 106207, train loss = 4.9952
Iteration 1520, time = 1.89s, wps = 108435, train loss = 4.9205
Iteration 1540, time = 1.91s, wps = 107082, train loss = 4.9401
Iteration 1560, time = 1.90s, wps = 107656, train loss = 4.9530
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m13.706s
user    23m46.622s
sys     4m56.829s
root@9250dfb71258:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=3 --datadir=./data/1-billion-word-
language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'num_sampled': 8192, 'keep_prob': 0.9, 'num_delayed_steps': 150, 'vocab_size': 793470, 'emb_size': 512, 'batch_size': 128, 'run_profiler': False, 'state_size': 2048, 'num_layers': 1, 'max_time': 180, 'optimizer': 0, 'do_summaries': False, 'num_gpus': 3, 'num_shards': 8, 'average_params': True, 'max_grad_norm': 10.0, 'projected_size': 512, 'learning_rate': 0.2, 'num_steps': 20}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1591917654.3401113
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
model/model_2/state_2_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:2
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-06-11 23:20:54.881470: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2900040000 Hz
2020-06-11 23:20:54.887851: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xa5e3150 executing computations on platform Host. Devices:
2020-06-11 23:20:54.887896: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-06-11 23:20:55.404263: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-11 23:20:55.409096: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-11 23:20:55.415835: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-11 23:20:55.417023: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xa5e2b70 executing computations on platform CUDA. Devices:
2020-06-11 23:20:55.417050: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-06-11 23:20:55.417056: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-06-11 23:20:55.417061: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-11 23:20:55.417069: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-11 23:20:55.418135: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:01:00.0
totalMemory: 23.65GiB freeMemory: 23.27GiB
2020-06-11 23:20:55.418165: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.49GiB
2020-06-11 23:20:55.418189: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4b:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-06-11 23:20:55.418212: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4c:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-06-11 23:20:55.418236: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-06-11 23:20:56.056258: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-06-11 23:20:56.056302: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-06-11 23:20:56.056307: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-06-11 23:20:56.056312: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-06-11 23:20:56.056317: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-06-11 23:20:56.056321: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-06-11 23:20:56.056456: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22553 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-06-11 23:20:56.056798: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22765 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-06-11 23:20:56.056945: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10231 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:4b:00.0, compute capability: 7.5)
2020-06-11 23:20:56.057211: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10231 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:4c:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00099-of-00100
Finished processing!
2020-06-11 23:21:08.699314: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 1567, time = 7.90s, wps = 973, train loss = 5.4500
Iteration 1568, time = 5.94s, wps = 1293, train loss = 4.9636
Iteration 1569, time = 0.09s, wps = 89117, train loss = 5.0042
Iteration 1570, time = 0.08s, wps = 98176, train loss = 5.0002
Iteration 1571, time = 0.08s, wps = 94367, train loss = 4.9683
Iteration 1572, time = 0.08s, wps = 100457, train loss = 4.9878
Iteration 1573, time = 0.08s, wps = 99576, train loss = 4.9419
Iteration 1574, time = 0.07s, wps = 102649, train loss = 4.9283
Iteration 1575, time = 0.08s, wps = 93450, train loss = 4.9711
Iteration 1586, time = 0.84s, wps = 100990, train loss = 4.8600
Iteration 1606, time = 1.51s, wps = 101496, train loss = 4.9082
Iteration 1626, time = 1.51s, wps = 101423, train loss = 4.8997
Iteration 1646, time = 1.53s, wps = 100688, train loss = 4.8946
Iteration 1666, time = 1.52s, wps = 100901, train loss = 4.9071
Iteration 1686, time = 1.51s, wps = 101495, train loss = 4.8291
Iteration 1706, time = 1.53s, wps = 100617, train loss = 4.8997
Iteration 1726, time = 1.52s, wps = 100915, train loss = 4.8400
Iteration 1746, time = 1.54s, wps = 100065, train loss = 4.8274
Iteration 1766, time = 1.51s, wps = 101560, train loss = 4.8086
Iteration 1786, time = 1.52s, wps = 101311, train loss = 4.8746
Iteration 1806, time = 1.52s, wps = 101011, train loss = 4.8863
Iteration 1826, time = 1.52s, wps = 100878, train loss = 4.8420
Iteration 1846, time = 1.51s, wps = 101443, train loss = 4.8610
Iteration 1866, time = 1.51s, wps = 101497, train loss = 4.8403
Iteration 1886, time = 1.51s, wps = 101414, train loss = 4.7856
Iteration 1906, time = 1.52s, wps = 101330, train loss = 4.7979
Iteration 1926, time = 1.53s, wps = 100440, train loss = 4.8718
Iteration 1946, time = 1.51s, wps = 101994, train loss = 4.7311
Iteration 1966, time = 1.53s, wps = 100533, train loss = 4.8058
Iteration 1986, time = 1.51s, wps = 101611, train loss = 4.7674
Iteration 2006, time = 1.53s, wps = 100247, train loss = 4.7912
Iteration 2026, time = 1.53s, wps = 100682, train loss = 4.7796
Iteration 2046, time = 1.53s, wps = 100074, train loss = 4.7998
Iteration 2066, time = 1.52s, wps = 101382, train loss = 4.8444
Iteration 2086, time = 1.53s, wps = 100072, train loss = 4.7452
Iteration 2106, time = 1.53s, wps = 100376, train loss = 4.8278
Iteration 2126, time = 1.51s, wps = 102016, train loss = 4.8060
Iteration 2146, time = 1.52s, wps = 101060, train loss = 4.7528
Iteration 2166, time = 1.52s, wps = 100751, train loss = 4.8000
Iteration 2186, time = 1.53s, wps = 100622, train loss = 4.8276
Iteration 2206, time = 1.54s, wps = 99958, train loss = 4.7822
Iteration 2226, time = 1.52s, wps = 100811, train loss = 4.7177
Iteration 2246, time = 1.53s, wps = 100458, train loss = 4.7473
Iteration 2266, time = 1.53s, wps = 100420, train loss = 4.7337
Iteration 2286, time = 1.52s, wps = 101111, train loss = 4.7085
Iteration 2306, time = 1.53s, wps = 100513, train loss = 4.7047
Iteration 2326, time = 1.52s, wps = 100828, train loss = 4.7442
Iteration 2346, time = 1.51s, wps = 101589, train loss = 4.7330
Iteration 2366, time = 1.51s, wps = 101869, train loss = 4.7208
Iteration 2386, time = 1.52s, wps = 100961, train loss = 4.7399
Iteration 2406, time = 1.52s, wps = 100910, train loss = 4.6528
Iteration 2426, time = 1.53s, wps = 100638, train loss = 4.6887
Iteration 2446, time = 1.52s, wps = 101103, train loss = 4.6762
Iteration 2466, time = 1.52s, wps = 100741, train loss = 4.7733
Iteration 2486, time = 1.53s, wps = 100477, train loss = 4.6537
Iteration 2506, time = 1.53s, wps = 100473, train loss = 4.6603
Iteration 2526, time = 1.53s, wps = 100502, train loss = 4.7065
Iteration 2546, time = 1.53s, wps = 100513, train loss = 4.7005
Iteration 2566, time = 1.53s, wps = 100576, train loss = 4.7490
Iteration 2586, time = 1.53s, wps = 100561, train loss = 4.7308
Iteration 2606, time = 1.54s, wps = 99770, train loss = 4.6832
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00077-of-00100
Finished processing!
Iteration 2626, time = 3.11s, wps = 49329, train loss = 4.7362
Iteration 2646, time = 1.53s, wps = 100613, train loss = 4.6950
Iteration 2666, time = 1.53s, wps = 100688, train loss = 4.6934
Iteration 2686, time = 1.54s, wps = 100003, train loss = 4.7069
Iteration 2706, time = 1.53s, wps = 100178, train loss = 4.6481
Iteration 2726, time = 1.52s, wps = 100762, train loss = 4.5413
Iteration 2746, time = 1.54s, wps = 99970, train loss = 4.6676
Iteration 2766, time = 1.52s, wps = 100877, train loss = 4.6493
Iteration 2786, time = 1.52s, wps = 101095, train loss = 4.6219
Iteration 2806, time = 1.53s, wps = 100098, train loss = 4.6605
Iteration 2826, time = 1.55s, wps = 99380, train loss = 4.6891
Iteration 2846, time = 1.52s, wps = 100762, train loss = 4.7048
Iteration 2866, time = 1.53s, wps = 100110, train loss = 4.5950
Iteration 2886, time = 1.53s, wps = 100288, train loss = 4.5821
Iteration 2906, time = 1.53s, wps = 100168, train loss = 4.6452
Iteration 2926, time = 1.53s, wps = 100340, train loss = 4.6208
Iteration 2946, time = 1.52s, wps = 101072, train loss = 4.6263
Iteration 2966, time = 1.53s, wps = 100256, train loss = 4.6903
Iteration 2986, time = 1.54s, wps = 99846, train loss = 4.6429
Iteration 3006, time = 1.53s, wps = 100069, train loss = 4.6349
Iteration 3026, time = 1.53s, wps = 100406, train loss = 4.6225
Iteration 3046, time = 1.54s, wps = 99718, train loss = 4.5904
Iteration 3066, time = 1.52s, wps = 100778, train loss = 4.6094
Iteration 3086, time = 1.52s, wps = 101081, train loss = 4.5093
Iteration 3106, time = 1.53s, wps = 100677, train loss = 4.6076
Iteration 3126, time = 1.53s, wps = 100415, train loss = 4.6532
Iteration 3146, time = 1.53s, wps = 100339, train loss = 4.6812
Iteration 3166, time = 1.53s, wps = 100533, train loss = 4.5064
Iteration 3186, time = 1.53s, wps = 100586, train loss = 4.6291
Iteration 3206, time = 1.53s, wps = 100498, train loss = 4.5851
Iteration 3226, time = 1.53s, wps = 100525, train loss = 4.6345
Iteration 3246, time = 1.53s, wps = 100669, train loss = 4.6662
Iteration 3266, time = 1.53s, wps = 100122, train loss = 4.5305
Iteration 3286, time = 1.53s, wps = 100488, train loss = 4.5773
Iteration 3306, time = 1.53s, wps = 100690, train loss = 4.5092
Iteration 3326, time = 1.53s, wps = 100579, train loss = 4.5225
Iteration 3346, time = 1.53s, wps = 100252, train loss = 4.5458
Iteration 3366, time = 1.54s, wps = 99495, train loss = 4.5312
Iteration 3386, time = 1.54s, wps = 99764, train loss = 4.5460
Iteration 3406, time = 1.53s, wps = 100615, train loss = 4.5960
Iteration 3426, time = 1.54s, wps = 99994, train loss = 4.5042
Iteration 3446, time = 1.54s, wps = 99806, train loss = 4.5834
Iteration 3466, time = 1.53s, wps = 100585, train loss = 4.5186
Iteration 3486, time = 1.53s, wps = 100347, train loss = 4.5138
Iteration 3506, time = 1.54s, wps = 99453, train loss = 4.5033
Iteration 3526, time = 1.54s, wps = 99902, train loss = 4.5139
Iteration 3546, time = 1.54s, wps = 100024, train loss = 4.5608
Iteration 3566, time = 1.52s, wps = 100762, train loss = 4.5414
Iteration 3586, time = 1.53s, wps = 100314, train loss = 4.5411
Iteration 3606, time = 1.54s, wps = 99862, train loss = 4.5724
Iteration 3626, time = 1.52s, wps = 101194, train loss = 4.4999
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m12.161s
user    20m48.164s
sys     4m41.121s
root@9250dfb71258:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-
language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'optimizer': 0, 'average_params': True, 'run_profiler': False, 'max_time': 180, 'learning_rate': 0.2, 'num_gpus': 2, 'emb_size': 512, 'keep_prob': 0.9, 'num_layers': 1, 'state_size': 2048, 'num_shards': 8, 'num_delayed_steps': 150, 'vocab_size': 793470, 'max_grad_norm': 10.0, 'do_summaries': False, 'num_sampled': 8192, 'num_steps': 20, 'projected_size': 512, 'batch_size': 128}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1591919239.040582
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-06-11 23:47:19.454455: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2900040000 Hz
2020-06-11 23:47:19.461005: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x928f920 executing computations on platform Host. Devices:
2020-06-11 23:47:19.461053: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-06-11 23:47:19.877424: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-11 23:47:19.909048: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-11 23:47:19.916293: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-11 23:47:19.917196: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x928f340 executing computations on platform CUDA. Devices:
2020-06-11 23:47:19.917226: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-06-11 23:47:19.917233: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-06-11 23:47:19.917240: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-11 23:47:19.917247: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-11 23:47:19.918287: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:01:00.0
totalMemory: 23.65GiB freeMemory: 23.27GiB
2020-06-11 23:47:19.918316: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.49GiB
2020-06-11 23:47:19.918343: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4b:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-06-11 23:47:19.918366: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4c:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-06-11 23:47:19.918389: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-06-11 23:47:20.556646: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-06-11 23:47:20.556687: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-06-11 23:47:20.556693: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-06-11 23:47:20.556696: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-06-11 23:47:20.556701: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-06-11 23:47:20.556705: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-06-11 23:47:20.556853: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22553 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-06-11 23:47:20.557207: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22765 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-06-11 23:47:20.557368: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10231 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:4b:00.0, compute capability: 7.5)
2020-06-11 23:47:20.557626: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10231 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:4c:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00099-of-00100
Finished processing!
2020-06-11 23:47:29.960411: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 3633, time = 5.43s, wps = 942, train loss = 5.3218
Iteration 3634, time = 3.52s, wps = 1456, train loss = 4.5218
Iteration 3635, time = 0.07s, wps = 76506, train loss = 4.4543
Iteration 3636, time = 0.06s, wps = 81301, train loss = 4.5193
Iteration 3637, time = 0.07s, wps = 77605, train loss = 4.4796
Iteration 3638, time = 0.06s, wps = 82943, train loss = 4.4263
Iteration 3639, time = 0.06s, wps = 88878, train loss = 4.4858
Iteration 3640, time = 0.06s, wps = 85276, train loss = 4.4822
Iteration 3641, time = 0.06s, wps = 86317, train loss = 4.4999
Iteration 3652, time = 0.64s, wps = 87830, train loss = 4.4397
Iteration 3672, time = 1.17s, wps = 87178, train loss = 4.4468
Iteration 3692, time = 1.17s, wps = 87873, train loss = 4.4718
Iteration 3712, time = 1.18s, wps = 86909, train loss = 4.4657
Iteration 3732, time = 1.16s, wps = 88320, train loss = 4.4658
Iteration 3752, time = 1.18s, wps = 86664, train loss = 4.4308
Iteration 3772, time = 1.16s, wps = 88596, train loss = 4.4679
Iteration 3792, time = 1.17s, wps = 87556, train loss = 4.3876
Iteration 3812, time = 1.16s, wps = 88143, train loss = 4.4164
Iteration 3832, time = 1.17s, wps = 87789, train loss = 4.4414
Iteration 3852, time = 1.16s, wps = 88630, train loss = 4.3995
Iteration 3872, time = 1.17s, wps = 87828, train loss = 4.4165
Iteration 3892, time = 1.16s, wps = 88361, train loss = 4.4241
Iteration 3912, time = 1.17s, wps = 87547, train loss = 4.3738
Iteration 3932, time = 1.16s, wps = 88371, train loss = 4.3819
Iteration 3952, time = 1.16s, wps = 88290, train loss = 4.4188
Iteration 3972, time = 1.17s, wps = 87619, train loss = 4.4508
Iteration 3992, time = 1.15s, wps = 89143, train loss = 4.4649
Iteration 4012, time = 1.17s, wps = 87874, train loss = 4.5103
Iteration 4032, time = 1.18s, wps = 87024, train loss = 4.4220
Iteration 4052, time = 1.16s, wps = 88187, train loss = 4.4812
Iteration 4072, time = 1.17s, wps = 87577, train loss = 4.4990
Iteration 4092, time = 1.15s, wps = 88923, train loss = 4.3992
Iteration 4112, time = 1.17s, wps = 87388, train loss = 4.4240
Iteration 4132, time = 1.17s, wps = 87186, train loss = 4.3412
Iteration 4152, time = 1.16s, wps = 88463, train loss = 4.3747
Iteration 4172, time = 1.17s, wps = 87458, train loss = 4.4725
Iteration 4192, time = 1.16s, wps = 88306, train loss = 4.4510
Iteration 4212, time = 1.16s, wps = 88407, train loss = 4.4123
Iteration 4232, time = 1.16s, wps = 88412, train loss = 4.4907
Iteration 4252, time = 1.16s, wps = 88263, train loss = 4.3921
Iteration 4272, time = 1.16s, wps = 87953, train loss = 4.4307
Iteration 4292, time = 1.16s, wps = 88219, train loss = 4.3735
Iteration 4312, time = 1.17s, wps = 87399, train loss = 4.4929
Iteration 4332, time = 1.16s, wps = 88057, train loss = 4.3690
Iteration 4352, time = 1.17s, wps = 87510, train loss = 4.4470
Iteration 4372, time = 1.17s, wps = 87425, train loss = 4.4029
Iteration 4392, time = 1.16s, wps = 88287, train loss = 4.3780
Iteration 4412, time = 1.18s, wps = 86747, train loss = 4.3776
Iteration 4432, time = 1.16s, wps = 88212, train loss = 4.3576
Iteration 4452, time = 1.17s, wps = 87745, train loss = 4.4684
Iteration 4472, time = 1.17s, wps = 87165, train loss = 4.3438
Iteration 4492, time = 1.16s, wps = 88237, train loss = 4.4295
Iteration 4512, time = 1.17s, wps = 87510, train loss = 4.4195
Iteration 4532, time = 1.17s, wps = 87363, train loss = 4.4145
Iteration 4552, time = 1.17s, wps = 87164, train loss = 4.4766
Iteration 4572, time = 1.17s, wps = 87598, train loss = 4.3692
Iteration 4592, time = 1.17s, wps = 87266, train loss = 4.3962
Iteration 4612, time = 1.16s, wps = 88118, train loss = 4.3689
Iteration 4632, time = 1.17s, wps = 87318, train loss = 4.4010
Iteration 4652, time = 1.17s, wps = 87701, train loss = 4.3236
Iteration 4672, time = 1.16s, wps = 88069, train loss = 4.3073
Iteration 4692, time = 1.17s, wps = 87670, train loss = 4.3786
Iteration 4712, time = 1.17s, wps = 87208, train loss = 4.4735
Iteration 4732, time = 1.16s, wps = 87918, train loss = 4.2960
Iteration 4752, time = 1.16s, wps = 88097, train loss = 4.4505
Iteration 4772, time = 1.19s, wps = 86024, train loss = 4.3376
Iteration 4792, time = 1.17s, wps = 87401, train loss = 4.3417
Iteration 4812, time = 1.17s, wps = 87635, train loss = 4.3006
Iteration 4832, time = 1.16s, wps = 88193, train loss = 4.3238
Iteration 4852, time = 1.17s, wps = 87193, train loss = 4.3903
Iteration 4872, time = 1.17s, wps = 87588, train loss = 4.3717
Iteration 4892, time = 1.17s, wps = 87743, train loss = 4.3417
Iteration 4912, time = 1.17s, wps = 87727, train loss = 4.3753
Iteration 4932, time = 1.16s, wps = 87933, train loss = 4.3888
Iteration 4952, time = 1.17s, wps = 87798, train loss = 4.3109
Iteration 4972, time = 1.17s, wps = 87882, train loss = 4.4329
Iteration 4992, time = 1.17s, wps = 87465, train loss = 4.3891
Iteration 5012, time = 1.18s, wps = 87103, train loss = 4.3750
Iteration 5032, time = 1.17s, wps = 87847, train loss = 4.3867
Iteration 5052, time = 1.16s, wps = 88541, train loss = 4.3508
Iteration 5072, time = 1.16s, wps = 87933, train loss = 4.3177
Iteration 5092, time = 1.19s, wps = 86136, train loss = 4.4170
Iteration 5112, time = 1.16s, wps = 87966, train loss = 4.3516
Iteration 5132, time = 1.17s, wps = 87502, train loss = 4.3560
Iteration 5152, time = 1.18s, wps = 87083, train loss = 4.3139
Iteration 5172, time = 1.18s, wps = 86777, train loss = 4.3637
Iteration 5192, time = 1.17s, wps = 87214, train loss = 4.3441
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00046-of-00100
Finished processing!
Iteration 5212, time = 2.78s, wps = 36797, train loss = 4.4345
Iteration 5232, time = 1.18s, wps = 86878, train loss = 4.3803
Iteration 5252, time = 1.18s, wps = 87020, train loss = 4.3724
Iteration 5272, time = 1.16s, wps = 88288, train loss = 4.4083
Iteration 5292, time = 1.18s, wps = 86902, train loss = 4.3861
Iteration 5312, time = 1.17s, wps = 87217, train loss = 4.3815
Iteration 5332, time = 1.17s, wps = 87548, train loss = 4.4238
Iteration 5352, time = 1.18s, wps = 86452, train loss = 4.4719
Iteration 5372, time = 1.17s, wps = 87657, train loss = 4.3843
Iteration 5392, time = 1.17s, wps = 87430, train loss = 4.3560
Iteration 5412, time = 1.18s, wps = 87147, train loss = 4.4791
Iteration 5432, time = 1.18s, wps = 86708, train loss = 4.3172
Iteration 5452, time = 1.17s, wps = 87234, train loss = 4.4197
Iteration 5472, time = 1.18s, wps = 86414, train loss = 4.3525
Iteration 5492, time = 1.17s, wps = 87250, train loss = 4.3639
Iteration 5512, time = 1.17s, wps = 87349, train loss = 4.3776
Iteration 5532, time = 1.17s, wps = 87515, train loss = 4.3944
Iteration 5552, time = 1.18s, wps = 87111, train loss = 4.3199
Iteration 5572, time = 1.19s, wps = 86073, train loss = 4.3728
Iteration 5592, time = 1.17s, wps = 87236, train loss = 4.3820
Iteration 5612, time = 1.18s, wps = 87013, train loss = 4.4530
Iteration 5632, time = 1.17s, wps = 87708, train loss = 4.3511
Iteration 5652, time = 1.19s, wps = 85900, train loss = 4.4410
Iteration 5672, time = 1.17s, wps = 87740, train loss = 4.3619
Iteration 5692, time = 1.17s, wps = 87219, train loss = 4.4182
Iteration 5712, time = 1.17s, wps = 87634, train loss = 4.4157
Iteration 5732, time = 1.17s, wps = 87847, train loss = 4.3030
Iteration 5752, time = 1.18s, wps = 87015, train loss = 4.4283
Iteration 5772, time = 1.18s, wps = 86918, train loss = 4.3432
Iteration 5792, time = 1.17s, wps = 87614, train loss = 4.3359
Iteration 5812, time = 1.19s, wps = 86380, train loss = 4.3904
Iteration 5832, time = 1.18s, wps = 86730, train loss = 4.3963
Iteration 5852, time = 1.17s, wps = 87277, train loss = 4.4378
Iteration 5872, time = 1.16s, wps = 88181, train loss = 4.3805
Iteration 5892, time = 1.17s, wps = 87353, train loss = 4.4401
Iteration 5912, time = 1.18s, wps = 87096, train loss = 4.2976
Iteration 5932, time = 1.19s, wps = 86308, train loss = 4.3596
Iteration 5952, time = 1.17s, wps = 87322, train loss = 4.3362
Iteration 5972, time = 1.19s, wps = 86222, train loss = 4.3635
Iteration 5992, time = 1.20s, wps = 85542, train loss = 4.4001
Iteration 6012, time = 1.19s, wps = 85709, train loss = 4.3554
Iteration 6032, time = 1.18s, wps = 86796, train loss = 4.3270
Iteration 6052, time = 1.18s, wps = 86429, train loss = 4.3219
Iteration 6072, time = 1.18s, wps = 86435, train loss = 4.2963
Iteration 6092, time = 1.20s, wps = 85397, train loss = 4.4168
Iteration 6112, time = 1.19s, wps = 85914, train loss = 4.2935
Iteration 6132, time = 1.17s, wps = 87298, train loss = 4.3039
Iteration 6152, time = 1.19s, wps = 85886, train loss = 4.4055
Iteration 6172, time = 1.20s, wps = 85234, train loss = 4.3489
Iteration 6192, time = 1.19s, wps = 86119, train loss = 4.4139
Iteration 6212, time = 1.18s, wps = 86446, train loss = 4.3522
Iteration 6232, time = 1.20s, wps = 85355, train loss = 4.3114
Iteration 6252, time = 1.20s, wps = 85460, train loss = 4.4030
Iteration 6272, time = 1.20s, wps = 85089, train loss = 4.2644
Iteration 6292, time = 1.22s, wps = 83952, train loss = 4.3742
Iteration 6312, time = 1.22s, wps = 83796, train loss = 4.3701
Iteration 6332, time = 1.21s, wps = 84627, train loss = 4.4033
Iteration 6352, time = 1.21s, wps = 84801, train loss = 4.3580
Iteration 6372, time = 1.22s, wps = 83905, train loss = 4.3884
Iteration 6392, time = 1.20s, wps = 85230, train loss = 4.3341
Iteration 6412, time = 1.21s, wps = 84365, train loss = 4.3847
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m10.462s
user    16m31.091s
sys     4m23.321s
root@9250dfb71258:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=1 --datadir=./data/1-billion-word-
language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'do_summaries': False, 'emb_size': 512, 'max_time': 180, 'num_sampled': 8192, 'batch_size': 128, 'num_steps': 20, 'optimizer': 0, 'vocab_size': 793470, 'num_delayed_steps': 150, 'num_layers': 1, 'run_profiler': False, 'keep_prob': 0.9, 'average_params': True, 'learning_rate': 0.2, 'max_grad_norm': 10.0, 'projected_size': 512, 'state_size': 2048, 'num_gpus': 1, 'num_shards': 8}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1591920596.805507
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-06-12 00:09:57.008475: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2900040000 Hz
2020-06-12 00:09:57.014969: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x751e4c0 executing computations on platform Host. Devices:
2020-06-12 00:09:57.015012: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-06-12 00:09:57.432288: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-12 00:09:57.467185: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-12 00:09:57.474153: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-12 00:09:57.475058: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x751dee0 executing computations on platform CUDA. Devices:
2020-06-12 00:09:57.475088: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-06-12 00:09:57.475094: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-06-12 00:09:57.475099: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-12 00:09:57.475105: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-12 00:09:57.476171: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:01:00.0
totalMemory: 23.65GiB freeMemory: 23.27GiB
2020-06-12 00:09:57.476199: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.49GiB
2020-06-12 00:09:57.476222: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4b:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-06-12 00:09:57.476244: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4c:00.0
totalMemory: 10.76GiB freeMemory: 10.61GiB
2020-06-12 00:09:57.476268: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-06-12 00:09:58.117742: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-06-12 00:09:58.117787: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-06-12 00:09:58.117792: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-06-12 00:09:58.117795: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-06-12 00:09:58.117800: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-06-12 00:09:58.117805: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-06-12 00:09:58.117951: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22553 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-06-12 00:09:58.118217: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22765 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-06-12 00:09:58.118677: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10231 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:4b:00.0, compute capability: 7.5)
2020-06-12 00:09:58.119118: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10231 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:4c:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00094-of-00100
Finished processing!
2020-06-12 00:10:05.005967: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 6420, time = 3.58s, wps = 715, train loss = 4.6732
Iteration 6421, time = 1.76s, wps = 1457, train loss = 4.5134
Iteration 6422, time = 0.06s, wps = 45775, train loss = 4.3275
Iteration 6423, time = 0.06s, wps = 46051, train loss = 4.2018
Iteration 6424, time = 0.05s, wps = 51006, train loss = 4.2790
Iteration 6425, time = 0.05s, wps = 53487, train loss = 4.3072
Iteration 6426, time = 0.05s, wps = 54125, train loss = 4.3704
Iteration 6427, time = 0.04s, wps = 60047, train loss = 4.2340
Iteration 6428, time = 0.04s, wps = 57157, train loss = 4.3125
Iteration 6439, time = 0.52s, wps = 54129, train loss = 4.4196
Iteration 6459, time = 0.92s, wps = 55604, train loss = 4.4737
Iteration 6479, time = 0.91s, wps = 56000, train loss = 4.4555
Iteration 6499, time = 0.93s, wps = 55239, train loss = 4.3579
Iteration 6519, time = 0.93s, wps = 54902, train loss = 4.3775
Iteration 6539, time = 0.93s, wps = 54913, train loss = 4.3127
Iteration 6559, time = 0.93s, wps = 55056, train loss = 4.3644
Iteration 6579, time = 0.92s, wps = 55480, train loss = 4.2610
Iteration 6599, time = 0.93s, wps = 55318, train loss = 4.1443
Iteration 6619, time = 0.93s, wps = 54922, train loss = 4.3951
Iteration 6639, time = 0.92s, wps = 55394, train loss = 4.3569
Iteration 6659, time = 0.93s, wps = 55172, train loss = 4.3835
Iteration 6679, time = 0.92s, wps = 55804, train loss = 4.3622
Iteration 6699, time = 0.93s, wps = 55335, train loss = 4.3652
Iteration 6719, time = 0.94s, wps = 54637, train loss = 4.4536
Iteration 6739, time = 0.93s, wps = 54838, train loss = 4.3052
Iteration 6759, time = 0.94s, wps = 54323, train loss = 4.3190
Iteration 6779, time = 0.93s, wps = 54896, train loss = 4.4289
Iteration 6799, time = 0.94s, wps = 54528, train loss = 4.2866
Iteration 6819, time = 0.96s, wps = 53391, train loss = 4.3156
Iteration 6839, time = 0.93s, wps = 54779, train loss = 4.4146
Iteration 6859, time = 0.94s, wps = 54652, train loss = 4.3212
Iteration 6879, time = 0.95s, wps = 54111, train loss = 4.2341
Iteration 6899, time = 0.92s, wps = 55377, train loss = 4.3867
Iteration 6919, time = 0.95s, wps = 53700, train loss = 4.3489
Iteration 6939, time = 0.93s, wps = 55166, train loss = 4.2924
Iteration 6959, time = 0.94s, wps = 54282, train loss = 4.2119
Iteration 6979, time = 0.93s, wps = 55190, train loss = 4.4586
Iteration 6999, time = 0.93s, wps = 54789, train loss = 4.2961
Iteration 7019, time = 0.93s, wps = 55081, train loss = 4.1920
Iteration 7039, time = 0.95s, wps = 54060, train loss = 4.3230
Iteration 7059, time = 0.94s, wps = 54452, train loss = 4.2924
Iteration 7079, time = 0.93s, wps = 54764, train loss = 4.3353
Iteration 7099, time = 0.93s, wps = 55247, train loss = 4.2599
Iteration 7119, time = 0.93s, wps = 55321, train loss = 4.3828
Iteration 7139, time = 0.93s, wps = 55188, train loss = 4.3605
Iteration 7159, time = 0.92s, wps = 55461, train loss = 4.3803
Iteration 7179, time = 0.93s, wps = 55297, train loss = 4.3211
Iteration 7199, time = 0.95s, wps = 53882, train loss = 4.4194
Iteration 7219, time = 0.94s, wps = 54668, train loss = 4.4020
Iteration 7239, time = 0.94s, wps = 54627, train loss = 4.1443
Iteration 7259, time = 0.95s, wps = 54083, train loss = 4.4323
Iteration 7279, time = 0.93s, wps = 54779, train loss = 4.3823
Iteration 7299, time = 0.94s, wps = 54316, train loss = 4.3621
Iteration 7319, time = 0.94s, wps = 54665, train loss = 4.2849
Iteration 7339, time = 0.93s, wps = 54793, train loss = 4.3770
Iteration 7359, time = 0.95s, wps = 54036, train loss = 4.3102
Iteration 7379, time = 0.94s, wps = 54669, train loss = 4.4176
Iteration 7399, time = 0.94s, wps = 54208, train loss = 4.4660
Iteration 7419, time = 0.94s, wps = 54543, train loss = 4.3857
Iteration 7439, time = 0.95s, wps = 53834, train loss = 4.2745
Iteration 7459, time = 0.95s, wps = 54014, train loss = 4.3646
Iteration 7479, time = 0.94s, wps = 54416, train loss = 4.2921
Iteration 7499, time = 0.93s, wps = 55291, train loss = 4.3728
Iteration 7519, time = 0.95s, wps = 53622, train loss = 4.2764
Iteration 7539, time = 0.94s, wps = 54212, train loss = 4.2969
Iteration 7559, time = 0.94s, wps = 54696, train loss = 4.4277
Iteration 7579, time = 0.95s, wps = 53958, train loss = 4.4030
Iteration 7599, time = 0.94s, wps = 54702, train loss = 4.4579
Iteration 7619, time = 0.95s, wps = 54102, train loss = 4.4251
Iteration 7639, time = 0.95s, wps = 53926, train loss = 4.2976
Iteration 7659, time = 0.95s, wps = 54143, train loss = 4.4781
Iteration 7679, time = 0.95s, wps = 53959, train loss = 4.3089
Iteration 7699, time = 0.94s, wps = 54219, train loss = 4.2809
Iteration 7719, time = 0.96s, wps = 53516, train loss = 4.4331
Iteration 7739, time = 0.93s, wps = 54947, train loss = 4.3741
Iteration 7759, time = 0.95s, wps = 54017, train loss = 4.1921
Iteration 7779, time = 0.94s, wps = 54250, train loss = 4.2769
Iteration 7799, time = 0.95s, wps = 53886, train loss = 4.4263
Iteration 7819, time = 0.96s, wps = 53607, train loss = 4.3591
Iteration 7839, time = 0.94s, wps = 54509, train loss = 4.2986
Iteration 7859, time = 0.96s, wps = 53224, train loss = 4.3973
Iteration 7879, time = 0.95s, wps = 54153, train loss = 4.2769
Iteration 7899, time = 0.95s, wps = 53956, train loss = 4.3172
Iteration 7919, time = 0.96s, wps = 53262, train loss = 4.3351
Iteration 7939, time = 0.95s, wps = 53782, train loss = 4.3285
Iteration 7959, time = 0.95s, wps = 54140, train loss = 4.2333
Iteration 7979, time = 0.95s, wps = 53796, train loss = 4.3648
Iteration 7999, time = 0.95s, wps = 53858, train loss = 4.3817
Iteration 8019, time = 0.93s, wps = 55175, train loss = 4.3891
Iteration 8039, time = 0.95s, wps = 53886, train loss = 4.3375
Iteration 8059, time = 0.94s, wps = 54291, train loss = 4.2899
Iteration 8079, time = 0.95s, wps = 53779, train loss = 4.2865
Iteration 8099, time = 0.96s, wps = 53521, train loss = 4.2296
Iteration 8119, time = 0.94s, wps = 54347, train loss = 4.4438
Iteration 8139, time = 0.95s, wps = 54174, train loss = 4.2700
Iteration 8159, time = 0.94s, wps = 54320, train loss = 4.3080
Iteration 8179, time = 0.95s, wps = 53759, train loss = 4.2659
Iteration 8199, time = 0.95s, wps = 53931, train loss = 4.4815
Iteration 8219, time = 0.96s, wps = 53338, train loss = 4.1332
Iteration 8239, time = 0.95s, wps = 53697, train loss = 4.3611
Iteration 8259, time = 0.95s, wps = 53969, train loss = 4.3074
Iteration 8279, time = 0.95s, wps = 53686, train loss = 4.2448
Iteration 8299, time = 0.96s, wps = 53328, train loss = 4.3609
Iteration 8319, time = 0.96s, wps = 53496, train loss = 4.3597
Iteration 8339, time = 0.96s, wps = 53561, train loss = 4.3407
Iteration 8359, time = 0.95s, wps = 53914, train loss = 4.3281
Iteration 8379, time = 0.96s, wps = 53520, train loss = 4.3582
Iteration 8399, time = 0.96s, wps = 53201, train loss = 4.3149
Iteration 8419, time = 0.94s, wps = 54190, train loss = 4.2764
Iteration 8439, time = 0.95s, wps = 53660, train loss = 4.3059
Iteration 8459, time = 0.94s, wps = 54426, train loss = 4.2583
Iteration 8479, time = 0.95s, wps = 53896, train loss = 4.3462
Iteration 8499, time = 0.95s, wps = 53678, train loss = 4.2055
Iteration 8519, time = 0.95s, wps = 53968, train loss = 4.2114
Iteration 8539, time = 0.95s, wps = 53668, train loss = 4.3191
Iteration 8559, time = 0.94s, wps = 54211, train loss = 4.3260
Iteration 8579, time = 0.94s, wps = 54189, train loss = 4.2125
Iteration 8599, time = 0.96s, wps = 53059, train loss = 4.2924
Iteration 8619, time = 0.95s, wps = 53785, train loss = 4.2746
Iteration 8639, time = 0.96s, wps = 53424, train loss = 4.3637
Iteration 8659, time = 0.95s, wps = 53692, train loss = 4.3316
Iteration 8679, time = 0.96s, wps = 53291, train loss = 4.2155
Iteration 8699, time = 0.98s, wps = 52498, train loss = 4.2262
Iteration 8719, time = 0.95s, wps = 54013, train loss = 4.4113
Iteration 8739, time = 0.97s, wps = 52941, train loss = 4.2881
Iteration 8759, time = 0.97s, wps = 52683, train loss = 4.2936
Iteration 8779, time = 0.96s, wps = 53474, train loss = 4.3620
Iteration 8799, time = 0.96s, wps = 53344, train loss = 4.3213
Iteration 8819, time = 0.96s, wps = 53377, train loss = 4.1749
Iteration 8839, time = 0.97s, wps = 52793, train loss = 4.2924
Iteration 8859, time = 0.96s, wps = 53509, train loss = 4.2420
Iteration 8879, time = 0.98s, wps = 52052, train loss = 4.1950
Iteration 8899, time = 0.97s, wps = 52923, train loss = 4.2464
Iteration 8919, time = 0.98s, wps = 52263, train loss = 4.2375
Iteration 8939, time = 0.97s, wps = 52950, train loss = 4.2496
Iteration 8959, time = 0.97s, wps = 53029, train loss = 4.1827
Iteration 8979, time = 0.98s, wps = 52305, train loss = 4.2421
Iteration 8999, time = 0.96s, wps = 53064, train loss = 4.3345
Iteration 9019, time = 0.96s, wps = 53093, train loss = 4.2799
Iteration 9039, time = 0.97s, wps = 52761, train loss = 4.3130
Iteration 9059, time = 0.97s, wps = 53041, train loss = 4.3662
Iteration 9079, time = 0.98s, wps = 52138, train loss = 4.3200
Iteration 9099, time = 0.98s, wps = 52331, train loss = 4.2666
Iteration 9119, time = 0.98s, wps = 52224, train loss = 4.2065
Iteration 9139, time = 0.97s, wps = 52583, train loss = 4.3688
Iteration 9159, time = 0.98s, wps = 52212, train loss = 4.3643
Iteration 9179, time = 0.99s, wps = 51576, train loss = 4.3739
Iteration 9199, time = 0.97s, wps = 52579, train loss = 4.2825
Iteration 9219, time = 0.98s, wps = 52268, train loss = 4.2033
Iteration 9239, time = 0.99s, wps = 51822, train loss = 4.3204
Iteration 9259, time = 0.98s, wps = 52438, train loss = 4.2135
Iteration 9279, time = 0.99s, wps = 51867, train loss = 4.2187
Iteration 9299, time = 1.00s, wps = 51352, train loss = 4.3329
Iteration 9319, time = 0.98s, wps = 52123, train loss = 4.4310
Iteration 9339, time = 0.99s, wps = 51837, train loss = 4.2702
Iteration 9359, time = 1.01s, wps = 50672, train loss = 4.2161
Iteration 9379, time = 1.00s, wps = 51438, train loss = 4.3443
Iteration 9399, time = 0.99s, wps = 51495, train loss = 4.3453
Iteration 9419, time = 0.99s, wps = 51563, train loss = 4.2750
Iteration 9439, time = 1.00s, wps = 51048, train loss = 4.2528
Iteration 9459, time = 0.99s, wps = 51805, train loss = 4.2696
Iteration 9479, time = 1.01s, wps = 50662, train loss = 4.2684
Iteration 9499, time = 1.00s, wps = 51357, train loss = 4.3209
Iteration 9519, time = 1.02s, wps = 49973, train loss = 4.3154
Iteration 9539, time = 1.01s, wps = 50695, train loss = 4.2896
Iteration 9559, time = 1.02s, wps = 50323, train loss = 4.2746
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00063-of-00100
Finished processing!
Iteration 9579, time = 2.59s, wps = 19773, train loss = 4.3275
Iteration 9599, time = 0.99s, wps = 51875, train loss = 4.4375
Iteration 9619, time = 1.00s, wps = 51143, train loss = 4.1949
Iteration 9639, time = 1.01s, wps = 50681, train loss = 4.3837
Iteration 9659, time = 0.98s, wps = 52247, train loss = 4.1845
Iteration 9679, time = 1.02s, wps = 50195, train loss = 4.2592
Iteration 9699, time = 1.00s, wps = 51328, train loss = 4.2662
Iteration 9719, time = 1.01s, wps = 50568, train loss = 4.2138
Iteration 9739, time = 1.00s, wps = 51330, train loss = 4.2908
Iteration 9759, time = 0.99s, wps = 51726, train loss = 4.3254
Iteration 9779, time = 1.02s, wps = 50085, train loss = 4.3411
Iteration 9799, time = 1.02s, wps = 50168, train loss = 4.3914
Iteration 9819, time = 1.03s, wps = 49624, train loss = 4.2592
Iteration 9839, time = 1.02s, wps = 50342, train loss = 4.2411
Iteration 9859, time = 1.02s, wps = 50046, train loss = 4.1452
Iteration 9879, time = 1.04s, wps = 49406, train loss = 4.2177
Iteration 9899, time = 1.02s, wps = 50299, train loss = 4.2428
Iteration 9919, time = 1.02s, wps = 50016, train loss = 4.2902
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m8.745s
user    9m34.200s
sys     3m0.364s
root@9250dfb71258:/workspace/nvidia-examples/big_lstm# cat /etc/os-release
NAME="Ubuntu"
VERSION="16.04.6 LTS (Xenial Xerus)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 16.04.6 LTS"
VERSION_ID="16.04"
HOME_URL="http://www.ubuntu.com/"
SUPPORT_URL="http://help.ubuntu.com/"
BUG_REPORT_URL="http://bugs.launchpad.net/ubuntu/"
VERSION_CODENAME=xenial
UBUNTU_CODENAME=xenial
root@9250dfb71258:/workspace/nvidia-examples/big_lstm# nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Fri_Feb__8_19:08:17_PST_2019
Cuda compilation tools, release 10.1, V10.1.105
root@9250dfb71258:/workspace/nvidia-examples/big_lstm# cd data
root@9250dfb71258:/workspace/nvidia-examples/big_lstm/data# ls
1-billion-word-language-modeling-benchmark-r13output
root@9250dfb71258:/workspace/nvidia-examples/big_lstm/data# cd 1-billion-word-language-modeling-benchmark-r13output
root@9250dfb71258:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# ls
1b_word_vocab.txt  heldout-monolingual.tokenized.shuffled
README             training-monolingual.tokenized.shuffled
root@9250dfb71258:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# cd training-monolingual.tokenized.shuffled
root@9250dfb71258:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# ls
news.en-00001-of-00100  news.en-00034-of-00100  news.en-00067-of-00100
news.en-00002-of-00100  news.en-00035-of-00100  news.en-00068-of-00100
news.en-00003-of-00100  news.en-00036-of-00100  news.en-00069-of-00100
news.en-00004-of-00100  news.en-00037-of-00100  news.en-00070-of-00100
news.en-00005-of-00100  news.en-00038-of-00100  news.en-00071-of-00100
news.en-00006-of-00100  news.en-00039-of-00100  news.en-00072-of-00100
news.en-00007-of-00100  news.en-00040-of-00100  news.en-00073-of-00100
news.en-00008-of-00100  news.en-00041-of-00100  news.en-00074-of-00100
news.en-00009-of-00100  news.en-00042-of-00100  news.en-00075-of-00100
news.en-00010-of-00100  news.en-00043-of-00100  news.en-00076-of-00100
news.en-00011-of-00100  news.en-00044-of-00100  news.en-00077-of-00100
news.en-00012-of-00100  news.en-00045-of-00100  news.en-00078-of-00100
news.en-00013-of-00100  news.en-00046-of-00100  news.en-00079-of-00100
news.en-00014-of-00100  news.en-00047-of-00100  news.en-00080-of-00100
news.en-00015-of-00100  news.en-00048-of-00100  news.en-00081-of-00100
news.en-00016-of-00100  news.en-00049-of-00100  news.en-00082-of-00100
news.en-00017-of-00100  news.en-00050-of-00100  news.en-00083-of-00100
news.en-00018-of-00100  news.en-00051-of-00100  news.en-00084-of-00100
news.en-00019-of-00100  news.en-00052-of-00100  news.en-00085-of-00100
news.en-00020-of-00100  news.en-00053-of-00100  news.en-00086-of-00100
news.en-00021-of-00100  news.en-00054-of-00100  news.en-00087-of-00100
news.en-00022-of-00100  news.en-00055-of-00100  news.en-00088-of-00100
news.en-00023-of-00100  news.en-00056-of-00100  news.en-00089-of-00100
news.en-00024-of-00100  news.en-00057-of-00100  news.en-00090-of-00100
news.en-00025-of-00100  news.en-00058-of-00100  news.en-00091-of-00100
news.en-00026-of-00100  news.en-00059-of-00100  news.en-00092-of-00100
news.en-00027-of-00100  news.en-00060-of-00100  news.en-00093-of-00100
news.en-00028-of-00100  news.en-00061-of-00100  news.en-00094-of-00100
news.en-00029-of-00100  news.en-00062-of-00100  news.en-00095-of-00100
news.en-00030-of-00100  news.en-00063-of-00100  news.en-00096-of-00100
news.en-00031-of-00100  news.en-00064-of-00100  news.en-00097-of-00100
news.en-00032-of-00100  news.en-00065-of-00100  news.en-00098-of-00100
news.en-00033-of-00100  news.en-00066-of-00100  news.en-00099-of-00100
root@9250dfb71258:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# exit
exit
[chibi@rhel8 ~]$ cat /etc/redhat-release
Red Hat Enterprise Linux release 8.2 (Ootpa)
[chibi@rhel8 ~]$ nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Wed_May__6_19:09:25_PDT_2020
Cuda compilation tools, release 11.0, V11.0.167
Build cuda_11.0_bu.TC445_37.28358933_0
[chibi@rhel8 ~]$ sensors
eth0-pci-4400
Adapter: PCI adapter
PHY Temperature:  +50.2°C

k10temp-pci-00c3
Adapter: PCI adapter
Tdie:         +39.8°C  (high = +70.0°C)
Tctl:         +39.8°C

iwlwifi-virtual-0
Adapter: Virtual device
temp1:        +36.0°C

[chibi@rhel8 ~]$ sudo nvme list
[sudo] chibi のパスワード:
Node             SN                   Model                                    Namespace Usage                      Format           FW Rev
---------------- -------------------- ---------------------------------------- --------- -------------------------- ---------------- --------
/dev/nvme0n1     P02938115263         PLEXTOR PX-512M9PeG                      1         512.11  GB / 512.11  GB    512   B +  0 B   1.07
[chibi@rhel8 ~]$ sudo nvme smart-log /dev/nvme0n1
Smart Log for NVME device:nvme0n1 namespace-id:ffffffff
critical_warning                    : 0
temperature                         : 60 C
available_spare                     : 100%
available_spare_threshold           : 0%
percentage_used                     : 0%
data_units_read                     : 585,185
data_units_written                  : 2,354,523
host_read_commands                  : 12,003,549
host_write_commands                 : 15,600,648
controller_busy_time                : 527
power_cycles                        : 50
power_on_hours                      : 65
unsafe_shutdowns                    : 9
media_errors                        : 0
num_err_log_entries                 : 0
Warning Temperature Time            : 0
Critical Composite Temperature Time : 0
Temperature Sensor 1                : 60 C
Thermal Management T1 Trans Count   : 0
Thermal Management T2 Trans Count   : 0
Thermal Management T1 Total Time    : 0
Thermal Management T2 Total Time    : 0
[chibi@rhel8 ~]$ nvidia-smi nvlink -c
GPU 0: TITAN RTX (UUID: GPU-7fb51c1d-c1e7-35cc-aad7-66971f05ddb7)
GPU 1: TITAN RTX (UUID: GPU-5a71d61e-f130-637a-b33d-4df555b0ed88)
GPU 2: GeForce RTX 2080 Ti (UUID: GPU-1ac935c2-557f-282e-14e5-3f749ffd63ac)
GPU 3: GeForce RTX 2080 Ti (UUID: GPU-13277ce5-e1e9-0cb1-8cee-6c9e6618e774)
[chibi@rhel8 ~]$ lscpu
アーキテクチャ:                      x86_64
CPU 操作モード:                      32-bit, 64-bit
バイト順序:                          Little Endian
CPU:                                 128
オンラインになっている CPU のリスト: 0-127
コアあたりのスレッド数:              2
ソケットあたりのコア数:              64
ソケット数:                          1
NUMA ノード数:                       1
ベンダー ID:                         AuthenticAMD
CPU ファミリー:                      23
モデル:                              49
モデル名:                            AMD Ryzen Threadripper 3990X 64-Core Processor
ステッピング:                        0
CPU MHz:                             3617.137
CPU 最大 MHz:                        2900.0000
CPU 最小 MHz:                        2200.0000
BogoMIPS:                            5800.08
仮想化:                              AMD-V
L1d キャッシュ:                      32K
L1i キャッシュ:                      32K
L2 キャッシュ:                       512K
L3 キャッシュ:                       16384K
NUMA ノード 0 CPU:                   0-127
フラグ:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
[chibi@rhel8 ~]$ lstopo
Machine (63GB)
  Package L#0
    L3 L#0 (16MB)
      L2 L#0 (512KB) + L1d L#0 (32KB) + L1i L#0 (32KB) + Core L#0
        PU L#0 (P#0)
        PU L#1 (P#64)
      L2 L#1 (512KB) + L1d L#1 (32KB) + L1i L#1 (32KB) + Core L#1
        PU L#2 (P#1)
        PU L#3 (P#65)
      L2 L#2 (512KB) + L1d L#2 (32KB) + L1i L#2 (32KB) + Core L#2
        PU L#4 (P#2)
        PU L#5 (P#66)
      L2 L#3 (512KB) + L1d L#3 (32KB) + L1i L#3 (32KB) + Core L#3
        PU L#6 (P#3)
        PU L#7 (P#67)
    L3 L#1 (16MB)
      L2 L#4 (512KB) + L1d L#4 (32KB) + L1i L#4 (32KB) + Core L#4
        PU L#8 (P#4)
        PU L#9 (P#68)
      L2 L#5 (512KB) + L1d L#5 (32KB) + L1i L#5 (32KB) + Core L#5
        PU L#10 (P#5)
        PU L#11 (P#69)
      L2 L#6 (512KB) + L1d L#6 (32KB) + L1i L#6 (32KB) + Core L#6
        PU L#12 (P#6)
        PU L#13 (P#70)
      L2 L#7 (512KB) + L1d L#7 (32KB) + L1i L#7 (32KB) + Core L#7
        PU L#14 (P#7)
        PU L#15 (P#71)
    L3 L#2 (16MB)
      L2 L#8 (512KB) + L1d L#8 (32KB) + L1i L#8 (32KB) + Core L#8
        PU L#16 (P#8)
        PU L#17 (P#72)
      L2 L#9 (512KB) + L1d L#9 (32KB) + L1i L#9 (32KB) + Core L#9
        PU L#18 (P#9)
        PU L#19 (P#73)
      L2 L#10 (512KB) + L1d L#10 (32KB) + L1i L#10 (32KB) + Core L#10
        PU L#20 (P#10)
        PU L#21 (P#74)
      L2 L#11 (512KB) + L1d L#11 (32KB) + L1i L#11 (32KB) + Core L#11
        PU L#22 (P#11)
        PU L#23 (P#75)
    L3 L#3 (16MB)
      L2 L#12 (512KB) + L1d L#12 (32KB) + L1i L#12 (32KB) + Core L#12
        PU L#24 (P#12)
        PU L#25 (P#76)
      L2 L#13 (512KB) + L1d L#13 (32KB) + L1i L#13 (32KB) + Core L#13
        PU L#26 (P#13)
        PU L#27 (P#77)
      L2 L#14 (512KB) + L1d L#14 (32KB) + L1i L#14 (32KB) + Core L#14
        PU L#28 (P#14)
        PU L#29 (P#78)
      L2 L#15 (512KB) + L1d L#15 (32KB) + L1i L#15 (32KB) + Core L#15
        PU L#30 (P#15)
        PU L#31 (P#79)
    L3 L#4 (16MB)
      L2 L#16 (512KB) + L1d L#16 (32KB) + L1i L#16 (32KB) + Core L#16
        PU L#32 (P#16)
        PU L#33 (P#80)
      L2 L#17 (512KB) + L1d L#17 (32KB) + L1i L#17 (32KB) + Core L#17
        PU L#34 (P#17)
        PU L#35 (P#81)
      L2 L#18 (512KB) + L1d L#18 (32KB) + L1i L#18 (32KB) + Core L#18
        PU L#36 (P#18)
        PU L#37 (P#82)
      L2 L#19 (512KB) + L1d L#19 (32KB) + L1i L#19 (32KB) + Core L#19
        PU L#38 (P#19)
        PU L#39 (P#83)
    L3 L#5 (16MB)
      L2 L#20 (512KB) + L1d L#20 (32KB) + L1i L#20 (32KB) + Core L#20
        PU L#40 (P#20)
        PU L#41 (P#84)
      L2 L#21 (512KB) + L1d L#21 (32KB) + L1i L#21 (32KB) + Core L#21
        PU L#42 (P#21)
        PU L#43 (P#85)
      L2 L#22 (512KB) + L1d L#22 (32KB) + L1i L#22 (32KB) + Core L#22
        PU L#44 (P#22)
        PU L#45 (P#86)
      L2 L#23 (512KB) + L1d L#23 (32KB) + L1i L#23 (32KB) + Core L#23
        PU L#46 (P#23)
        PU L#47 (P#87)
    L3 L#6 (16MB)
      L2 L#24 (512KB) + L1d L#24 (32KB) + L1i L#24 (32KB) + Core L#24
        PU L#48 (P#24)
        PU L#49 (P#88)
      L2 L#25 (512KB) + L1d L#25 (32KB) + L1i L#25 (32KB) + Core L#25
        PU L#50 (P#25)
        PU L#51 (P#89)
      L2 L#26 (512KB) + L1d L#26 (32KB) + L1i L#26 (32KB) + Core L#26
        PU L#52 (P#26)
        PU L#53 (P#90)
      L2 L#27 (512KB) + L1d L#27 (32KB) + L1i L#27 (32KB) + Core L#27
        PU L#54 (P#27)
        PU L#55 (P#91)
    L3 L#7 (16MB)
      L2 L#28 (512KB) + L1d L#28 (32KB) + L1i L#28 (32KB) + Core L#28
        PU L#56 (P#28)
        PU L#57 (P#92)
      L2 L#29 (512KB) + L1d L#29 (32KB) + L1i L#29 (32KB) + Core L#29
        PU L#58 (P#29)
        PU L#59 (P#93)
      L2 L#30 (512KB) + L1d L#30 (32KB) + L1i L#30 (32KB) + Core L#30
        PU L#60 (P#30)
        PU L#61 (P#94)
      L2 L#31 (512KB) + L1d L#31 (32KB) + L1i L#31 (32KB) + Core L#31
        PU L#62 (P#31)
        PU L#63 (P#95)
    L3 L#8 (16MB)
      L2 L#32 (512KB) + L1d L#32 (32KB) + L1i L#32 (32KB) + Core L#32
        PU L#64 (P#32)
        PU L#65 (P#96)
      L2 L#33 (512KB) + L1d L#33 (32KB) + L1i L#33 (32KB) + Core L#33
        PU L#66 (P#33)
        PU L#67 (P#97)
      L2 L#34 (512KB) + L1d L#34 (32KB) + L1i L#34 (32KB) + Core L#34
        PU L#68 (P#34)
        PU L#69 (P#98)
      L2 L#35 (512KB) + L1d L#35 (32KB) + L1i L#35 (32KB) + Core L#35
        PU L#70 (P#35)
        PU L#71 (P#99)
    L3 L#9 (16MB)
      L2 L#36 (512KB) + L1d L#36 (32KB) + L1i L#36 (32KB) + Core L#36
        PU L#72 (P#36)
        PU L#73 (P#100)
      L2 L#37 (512KB) + L1d L#37 (32KB) + L1i L#37 (32KB) + Core L#37
        PU L#74 (P#37)
        PU L#75 (P#101)
      L2 L#38 (512KB) + L1d L#38 (32KB) + L1i L#38 (32KB) + Core L#38
        PU L#76 (P#38)
        PU L#77 (P#102)
      L2 L#39 (512KB) + L1d L#39 (32KB) + L1i L#39 (32KB) + Core L#39
        PU L#78 (P#39)
        PU L#79 (P#103)
    L3 L#10 (16MB)
      L2 L#40 (512KB) + L1d L#40 (32KB) + L1i L#40 (32KB) + Core L#40
        PU L#80 (P#40)
        PU L#81 (P#104)
      L2 L#41 (512KB) + L1d L#41 (32KB) + L1i L#41 (32KB) + Core L#41
        PU L#82 (P#41)
        PU L#83 (P#105)
      L2 L#42 (512KB) + L1d L#42 (32KB) + L1i L#42 (32KB) + Core L#42
        PU L#84 (P#42)
        PU L#85 (P#106)
      L2 L#43 (512KB) + L1d L#43 (32KB) + L1i L#43 (32KB) + Core L#43
        PU L#86 (P#43)
        PU L#87 (P#107)
    L3 L#11 (16MB)
      L2 L#44 (512KB) + L1d L#44 (32KB) + L1i L#44 (32KB) + Core L#44
        PU L#88 (P#44)
        PU L#89 (P#108)
      L2 L#45 (512KB) + L1d L#45 (32KB) + L1i L#45 (32KB) + Core L#45
        PU L#90 (P#45)
        PU L#91 (P#109)
      L2 L#46 (512KB) + L1d L#46 (32KB) + L1i L#46 (32KB) + Core L#46
        PU L#92 (P#46)
        PU L#93 (P#110)
      L2 L#47 (512KB) + L1d L#47 (32KB) + L1i L#47 (32KB) + Core L#47
        PU L#94 (P#47)
        PU L#95 (P#111)
    L3 L#12 (16MB)
      L2 L#48 (512KB) + L1d L#48 (32KB) + L1i L#48 (32KB) + Core L#48
        PU L#96 (P#48)
        PU L#97 (P#112)
      L2 L#49 (512KB) + L1d L#49 (32KB) + L1i L#49 (32KB) + Core L#49
        PU L#98 (P#49)
        PU L#99 (P#113)
      L2 L#50 (512KB) + L1d L#50 (32KB) + L1i L#50 (32KB) + Core L#50
        PU L#100 (P#50)
        PU L#101 (P#114)
      L2 L#51 (512KB) + L1d L#51 (32KB) + L1i L#51 (32KB) + Core L#51
        PU L#102 (P#51)
        PU L#103 (P#115)
    L3 L#13 (16MB)
      L2 L#52 (512KB) + L1d L#52 (32KB) + L1i L#52 (32KB) + Core L#52
        PU L#104 (P#52)
        PU L#105 (P#116)
      L2 L#53 (512KB) + L1d L#53 (32KB) + L1i L#53 (32KB) + Core L#53
        PU L#106 (P#53)
        PU L#107 (P#117)
      L2 L#54 (512KB) + L1d L#54 (32KB) + L1i L#54 (32KB) + Core L#54
        PU L#108 (P#54)
        PU L#109 (P#118)
      L2 L#55 (512KB) + L1d L#55 (32KB) + L1i L#55 (32KB) + Core L#55
        PU L#110 (P#55)
        PU L#111 (P#119)
    L3 L#14 (16MB)
      L2 L#56 (512KB) + L1d L#56 (32KB) + L1i L#56 (32KB) + Core L#56
        PU L#112 (P#56)
        PU L#113 (P#120)
      L2 L#57 (512KB) + L1d L#57 (32KB) + L1i L#57 (32KB) + Core L#57
        PU L#114 (P#57)
        PU L#115 (P#121)
      L2 L#58 (512KB) + L1d L#58 (32KB) + L1i L#58 (32KB) + Core L#58
        PU L#116 (P#58)
        PU L#117 (P#122)
      L2 L#59 (512KB) + L1d L#59 (32KB) + L1i L#59 (32KB) + Core L#59
        PU L#118 (P#59)
        PU L#119 (P#123)
    L3 L#15 (16MB)
      L2 L#60 (512KB) + L1d L#60 (32KB) + L1i L#60 (32KB) + Core L#60
        PU L#120 (P#60)
        PU L#121 (P#124)
      L2 L#61 (512KB) + L1d L#61 (32KB) + L1i L#61 (32KB) + Core L#61
        PU L#122 (P#61)
        PU L#123 (P#125)
      L2 L#62 (512KB) + L1d L#62 (32KB) + L1i L#62 (32KB) + Core L#62
        PU L#124 (P#62)
        PU L#125 (P#126)
      L2 L#63 (512KB) + L1d L#63 (32KB) + L1i L#63 (32KB) + Core L#63
        PU L#126 (P#63)
        PU L#127 (P#127)
  HostBridge L#0
    PCIBridge
      PCI 10de:1e02
        GPU L#0 "renderD128"
        GPU L#1 "card0"
  HostBridge L#2
    PCIBridge
      PCI 10de:1e02
        GPU L#2 "card1"
        GPU L#3 "renderD129"
  HostBridge L#4
    PCIBridge
      PCIBridge
        PCIBridge
          PCI 1d6a:07b1
            Net L#4 "eth0"
        PCIBridge
          PCI 8086:2723
            Net L#5 "wlan1"
        PCIBridge
          PCI 10ec:8125
        PCIBridge
          PCI 1022:7901
        PCIBridge
          PCI 1022:7901
    PCIBridge
      PCI 14a4:23f1
    PCIBridge
      PCI 10de:1e07
        GPU L#6 "renderD130"
        GPU L#7 "card2"
    PCIBridge
      PCI 10de:1e07
        GPU L#8 "card3"
        GPU L#9 "renderD131"
[chibi@rhel8 ~]$ sudo nvme smart-log /dev/nvme0n1
Smart Log for NVME device:nvme0n1 namespace-id:ffffffff
critical_warning                    : 0
temperature                         : 60 C
available_spare                     : 100%
available_spare_threshold           : 0%
percentage_used                     : 0%
data_units_read                     : 585,185
data_units_written                  : 2,354,523
host_read_commands                  : 12,003,549
host_write_commands                 : 15,600,662
controller_busy_time                : 527
power_cycles                        : 50
power_on_hours                      : 66
unsafe_shutdowns                    : 9
media_errors                        : 0
num_err_log_entries                 : 0
Warning Temperature Time            : 0
Critical Composite Temperature Time : 0
Temperature Sensor 1                : 60 C
Thermal Management T1 Trans Count   : 0
Thermal Management T2 Trans Count   : 0
Thermal Management T1 Total Time    : 0
Thermal Management T2 Total Time    : 0
[chibi@rhel8 ~]$