[chibi@centos8 ~]$ sudo nvidia-docker run --rm -ti nvcr.io/nvidia/tensorflow:19.04-py3
Unable to find image 'nvcr.io/nvidia/tensorflow:19.04-py3' locally
19.04-py3: Pulling from nvidia/tensorflow
34667c7e4631: Pulling fs layer
d18d76a881a4: Pulling fs layer
119c7358fbfc: Pulling fs layer
2aaf13f3eff0: Waiting
202fa0f8874b: Waiting
3b700a61ede6: Waiting
87e6ca450d3f: Waiting
a1e76dce1aec: Pulling fs layer
a1e76dce1aec: Waiting
b5877a9add73: Pulling fs layer
bab74df105f1: Waiting
534bbf505504: Waiting
9b91fa2f9276: Waiting
f4371944c97d: Pulling fs layer
f4371944c97d: Waiting
5db2639932b5: Pulling fs layer
629d5c9d75a4: Pulling fs layer
8071b94b5429: Pulling fs layer
6eb8eba2ad5a: Pulling fs layer
e32e86c15b8b: Pulling fs layer
4615a735431d: Waiting
f71ce95fb406: Waiting
8071b94b5429: Waiting
e32e86c15b8b: Waiting
34bc85bf8bef: Waiting
4a95ca3431c4: Waiting
41bc2d0a4d4d: Waiting
a2ceadc61854: Waiting
2d0c5308ff92: Waiting
a531832992b8: Waiting
b24a8fd8f2e1: Waiting
8d9313624ab7: Waiting
e5cafe011f22: Pull complete
eca19a329cd4: Pull complete
65ee50af0bcc: Pull complete
5f60ec8c32f4: Pull complete
d7dcb657fa13: Pull complete
1f6ef6575fbe: Pull complete
d1ef346a3015: Pull complete
4ef9cb404fd5: Pull complete
f6797f45a018: Pull complete
1d4380527325: Pull complete
965f2629db02: Pull complete
5debff4c8c0a: Pull complete
b3a3a9d82be6: Pull complete
eac05f20b729: Pull complete
3ce0a7f80167: Pull complete
2a21e34a5784: Pull complete
c1ccf19e258e: Pull complete
0b6ea9d0652b: Pull complete
307bc8c3f024: Pull complete
ca75fd593a79: Pull complete
0cd3cdca1af7: Pull complete
48e857e9d372: Pull complete
3264ea403ca9: Pull complete
Digest: sha256:aaebc136d5d50937362675c77afd908bd96cded68846f39163050a023c8a9851
Status: Downloaded newer image for nvcr.io/nvidia/tensorflow:19.04-py3
                                                                                
================
== TensorFlow ==
================

NVIDIA Release 19.04 (build 6132408)
TensorFlow Version 1.13.1

Container image Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
Copyright 2017-2019 The TensorFlow Authors.  All rights reserved.

Various files include modifications (c) NVIDIA CORPORATION.  All rights reserved.
NVIDIA modifications are covered by the license terms that apply to the underlying project or file.

NOTE: MOFED driver for multi-node communication was not detected.
      Multi-node communication performance may be reduced.

NOTE: The SHMEM allocation limit is set to the default of 64MB.  This may be
   insufficient for TensorFlow.  NVIDIA recommends the use of the following flags:
   nvidia-docker run --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 ...

root@e0a73ff92515:/workspace# ls
README.md  docker-examples  nvidia-examples
root@e0a73ff92515:/workspace# cd nvidia-examples/big_lstm
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm# ls
1b_word_vocab.txt  data_utils_test.py         language_model_test.py
README.md          download_1b_words_data.sh  model_utils.py
__init__.py        hparams.py                 run_utils.py
common.py          hparams_test.py            single_lm_train.py
data_utils.py      language_model.py          testdata
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm# ./download_1b_words_data.sh
Please specify root of dataset directory: data

Success: dataset root dir validated

--2020-06-03 02:05:31--  http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
Resolving www.statmt.org (www.statmt.org)... 129.215.197.184
Connecting to www.statmt.org (www.statmt.org)|129.215.197.184|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1792209805 (1.7G) [application/x-gzip]
Saving to: ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’

1-billion-word-lang 100%[===================>]   1.67G  54.6KB/s    in 3h 22m

2020-06-03 05:28:07 (144 KB/s) - ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’ saved [1792209805/1792209805]

1-billion-word-language-modeling-benchmark-r13output/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00024-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00057-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00055-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00096-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00081-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00033-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00072-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00082-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00018-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00008-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00059-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00005-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00091-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00062-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00031-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00095-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00076-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00006-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00038-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00015-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00087-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00021-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00049-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00009-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00027-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00056-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00046-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00032-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00029-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00088-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00085-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00011-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00012-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00067-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00003-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00093-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00050-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00053-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00044-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00019-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00066-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00028-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00045-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00039-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00071-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00052-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00078-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00037-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00002-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00014-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00048-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00017-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00004-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00077-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00080-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00020-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00051-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00016-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00079-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00043-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00068-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00099-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00064-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00034-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00054-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00040-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00070-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00063-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00041-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00083-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00061-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00073-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00094-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00030-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00060-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00035-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00023-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00042-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00025-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00090-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00089-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00065-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00075-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00022-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00026-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00098-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00084-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00010-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00069-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00013-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00092-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00036-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00097-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00007-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00074-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00001-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00047-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00086-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00058-of-00100
1-billion-word-language-modeling-benchmark-r13output/.svn/
1-billion-word-language-modeling-benchmark-r13output/.svn/tmp/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/de102cd0c91cd19e6612f0840e68a2f20ba8134c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/deed1b75d3bd5cc36ae6aeb85d56680b892b7948.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/86c58db52fbf362c5bc329afc33b8805085fcb0d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/9f2882e21f860a83ad6ea8898ebab140974ed301.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/bcdbc523ee7488dc438cab869b6d5e236578dbfa.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/d2718bc26d0ee0a213d7d4add99a304cb5b39ede.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/c5b24f61479da923123d0394a188da922ea0359c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/116d6ea61730d8199127596b072e981338597779.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/b0e26559cfe641245584a9400b35ba28d64f1411.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/d3ae508e3bcb0e696dd70aecd052410f1f7afc1d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/9e148bd766e8805e0eb97eeae250433ec7a2e996.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/31b645a482e0b81fda3c567cada307c6fcf7ec80.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/da39a3ee5e6b4b0d3255bfef95601890afd80709.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/c1ed42c415ec884e591fb5c70d373da640a383b5.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/e37ba0f85e94073ccaced1eed7e4f5d737a25f49.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/entries
1-billion-word-language-modeling-benchmark-r13output/.svn/format
1-billion-word-language-modeling-benchmark-r13output/.svn/wc.db
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00015-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00031-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00027-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00010-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00033-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00042-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00046-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00037-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00029-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00013-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00002-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00048-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00006-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00030-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00025-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00039-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00008-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00020-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00001-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00034-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00044-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00045-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00016-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00004-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00035-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00038-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00009-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00024-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00022-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00021-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00032-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00011-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00049-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00041-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00019-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00023-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00040-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00014-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00007-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00017-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00012-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00018-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00003-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00028-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en-00000-of-00100
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00043-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00005-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00036-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00026-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00047-of-00050
1-billion-word-language-modeling-benchmark-r13output/README

Success! One billion words dataset ready at:
data/1-billion-word-language-modeling-benchmark-r13output/
Please pass this dir to single_lm_train.py via the --datadir option.

root@e0a73ff92515:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=4 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'projected_size': 512, 'keep_prob': 0.9, 'max_grad_norm': 10.0, 'emb_size': 512, 'max_time': 180, 'optimizer': 0, 'num_shards': 8, 'do_summaries': False, 'average_params': True, 'batch_size': 128, 'run_profiler': False, 'vocab_size': 793470, 'state_size': 2048, 'num_steps': 20, 'num_delayed_steps': 150, 'learning_rate': 0.2, 'num_gpus': 4, 'num_sampled': 8192, 'num_layers': 1}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1591162193.5661428
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
model/model_2/state_2_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:2
model/model_3/state_3_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:3
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-06-03 05:29:54.238335: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2899895000 Hz
2020-06-03 05:29:54.245791: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xbd03910 executing computations on platform Host. Devices:
2020-06-03 05:29:54.245836: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-06-03 05:29:54.709728: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-03 05:29:54.714887: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-03 05:29:54.721530: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-03 05:29:54.722471: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xbd03330 executing computations on platform CUDA. Devices:
2020-06-03 05:29:54.722516: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-06-03 05:29:54.722523: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-06-03 05:29:54.722530: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-03 05:29:54.722536: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-03 05:29:54.723738: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:01:00.0
totalMemory: 23.65GiB freeMemory: 23.22GiB
2020-06-03 05:29:54.723768: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.48GiB
2020-06-03 05:29:54.723790: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4a:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-03 05:29:54.723812: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4b:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-03 05:29:54.723953: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-06-03 05:29:55.507957: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-06-03 05:29:55.508004: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-06-03 05:29:55.508009: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-06-03 05:29:55.508013: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-06-03 05:29:55.508016: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-06-03 05:29:55.508021: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-06-03 05:29:55.508143: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22500 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-06-03 05:29:55.508503: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22757 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-06-03 05:29:55.508783: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10224 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:4a:00.0, compute capability: 7.5)
2020-06-03 05:29:55.509047: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10224 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:4b:00.0, compute capability: 7.5)
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00041-of-00100
Finished processing!
2020-06-03 05:30:15.212606: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 1, time = 10.19s, wps = 1005, train loss = 13.0140
Iteration 2, time = 7.99s, wps = 1282, train loss = 12.9435
Iteration 3, time = 0.10s, wps = 98452, train loss = 12.7946
Iteration 4, time = 0.10s, wps = 103417, train loss = 16.1068
Iteration 5, time = 0.10s, wps = 104724, train loss = 12.4338
Iteration 6, time = 0.09s, wps = 109682, train loss = 16.9657
Iteration 7, time = 0.10s, wps = 103115, train loss = 13.5717
Iteration 8, time = 0.09s, wps = 108915, train loss = 12.0165
Iteration 9, time = 0.10s, wps = 107361, train loss = 38.5849
Iteration 20, time = 1.05s, wps = 107301, train loss = 10.9848
Iteration 40, time = 1.91s, wps = 107325, train loss = 11.4315
Iteration 60, time = 1.89s, wps = 108229, train loss = 8.9533
Iteration 80, time = 1.89s, wps = 108291, train loss = 8.6800
Iteration 100, time = 1.90s, wps = 107558, train loss = 7.9227
Iteration 120, time = 1.92s, wps = 106608, train loss = 7.6694
Iteration 140, time = 1.91s, wps = 107123, train loss = 7.2194
Iteration 160, time = 1.90s, wps = 107945, train loss = 7.0019
Iteration 180, time = 1.89s, wps = 108143, train loss = 6.7265
Iteration 200, time = 1.91s, wps = 107431, train loss = 6.4149
Iteration 220, time = 1.89s, wps = 108315, train loss = 6.3532
Iteration 240, time = 1.90s, wps = 108054, train loss = 6.3307
Iteration 260, time = 1.88s, wps = 108701, train loss = 6.2501
Iteration 280, time = 1.90s, wps = 107878, train loss = 6.1770
Iteration 300, time = 1.91s, wps = 107040, train loss = 6.0954
Iteration 320, time = 1.90s, wps = 107837, train loss = 6.0784
Iteration 340, time = 1.90s, wps = 107782, train loss = 5.9802
Iteration 360, time = 1.92s, wps = 106558, train loss = 5.9102
Iteration 380, time = 1.89s, wps = 108523, train loss = 5.9022
Iteration 400, time = 1.91s, wps = 107003, train loss = 5.9361
Iteration 420, time = 1.91s, wps = 107327, train loss = 6.0096
Iteration 440, time = 1.90s, wps = 107513, train loss = 5.8518
Iteration 460, time = 1.89s, wps = 108483, train loss = 5.7802
Iteration 480, time = 1.89s, wps = 108487, train loss = 5.7183
Iteration 500, time = 1.91s, wps = 107078, train loss = 5.7344
Iteration 520, time = 1.91s, wps = 107430, train loss = 5.6965
Iteration 540, time = 1.91s, wps = 107381, train loss = 5.7180
Iteration 560, time = 1.90s, wps = 107910, train loss = 5.6378
Iteration 580, time = 1.92s, wps = 106721, train loss = 5.6342
Iteration 600, time = 1.91s, wps = 107281, train loss = 5.6478
Iteration 620, time = 1.91s, wps = 107495, train loss = 5.5701
Iteration 640, time = 1.90s, wps = 107624, train loss = 5.5614
Iteration 660, time = 1.91s, wps = 107016, train loss = 5.5821
Iteration 680, time = 1.92s, wps = 106693, train loss = 5.5554
Iteration 700, time = 1.92s, wps = 106928, train loss = 5.5565
Iteration 720, time = 1.90s, wps = 107905, train loss = 5.5507
Iteration 740, time = 1.92s, wps = 106620, train loss = 5.4535
Iteration 760, time = 1.90s, wps = 107803, train loss = 5.5493
Iteration 780, time = 1.90s, wps = 107518, train loss = 5.5032
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00078-of-00100
Finished processing!
Iteration 800, time = 3.53s, wps = 57974, train loss = 5.4511
Iteration 820, time = 1.89s, wps = 108439, train loss = 5.4159
Iteration 840, time = 1.89s, wps = 108386, train loss = 5.3692
Iteration 860, time = 1.91s, wps = 107287, train loss = 5.4108
Iteration 880, time = 1.90s, wps = 107550, train loss = 5.3716
Iteration 900, time = 1.89s, wps = 108143, train loss = 5.4022
Iteration 920, time = 1.91s, wps = 107431, train loss = 5.3747
Iteration 940, time = 1.91s, wps = 107342, train loss = 5.3137
Iteration 960, time = 1.91s, wps = 107099, train loss = 5.3360
Iteration 980, time = 1.91s, wps = 107228, train loss = 5.2727
Iteration 1000, time = 1.90s, wps = 107705, train loss = 5.2829
Iteration 1020, time = 1.92s, wps = 106930, train loss = 5.2912
Iteration 1040, time = 1.90s, wps = 107780, train loss = 5.2388
Iteration 1060, time = 1.91s, wps = 107067, train loss = 5.2305
Iteration 1080, time = 1.91s, wps = 107066, train loss = 5.2609
Iteration 1100, time = 1.90s, wps = 107675, train loss = 5.1715
Iteration 1120, time = 1.91s, wps = 107503, train loss = 5.2019
Iteration 1140, time = 1.92s, wps = 106818, train loss = 5.2121
Iteration 1160, time = 1.90s, wps = 107676, train loss = 5.1573
Iteration 1180, time = 1.91s, wps = 107506, train loss = 5.1558
Iteration 1200, time = 1.90s, wps = 107637, train loss = 5.1254
Iteration 1220, time = 1.90s, wps = 107996, train loss = 5.0076
Iteration 1240, time = 1.91s, wps = 107412, train loss = 5.0779
Iteration 1260, time = 1.92s, wps = 106820, train loss = 5.1436
Iteration 1280, time = 1.89s, wps = 108126, train loss = 5.0753
Iteration 1300, time = 1.91s, wps = 106949, train loss = 5.0946
Iteration 1320, time = 1.90s, wps = 107688, train loss = 5.0468
Iteration 1340, time = 1.92s, wps = 106698, train loss = 5.1272
Iteration 1360, time = 1.92s, wps = 106848, train loss = 5.0734
Iteration 1380, time = 1.93s, wps = 106193, train loss = 5.0388
Iteration 1400, time = 1.90s, wps = 107690, train loss = 5.0447
Iteration 1420, time = 1.91s, wps = 107166, train loss = 5.0080
Iteration 1440, time = 1.92s, wps = 106638, train loss = 4.9890
Iteration 1460, time = 1.91s, wps = 107331, train loss = 4.9866
Iteration 1480, time = 1.90s, wps = 107876, train loss = 4.9858
Iteration 1500, time = 1.93s, wps = 106166, train loss = 5.0260
Iteration 1520, time = 1.90s, wps = 107658, train loss = 5.0276
Iteration 1540, time = 1.92s, wps = 106694, train loss = 4.9221
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m13.736s
user    23m38.557s
sys     4m52.261s
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=3 --datadir=./data/1-billion-word-
language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'optimizer': 0, 'state_size': 2048, 'num_gpus': 3, 'learning_rate': 0.2, 'num_shards': 8, 'max_time': 180, 'num_delayed_steps': 150, 'num_sampled': 8192, 'max_grad_norm': 10.0, 'projected_size': 512, 'average_params': True, 'num_layers': 1, 'run_profiler': False, 'emb_size': 512, 'num_steps': 20, 'batch_size': 128, 'do_summaries': False, 'vocab_size': 793470, 'keep_prob': 0.9}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1591164901.532289
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
model/model_2/state_2_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:2
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-06-03 06:15:02.070343: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2899895000 Hz
2020-06-03 06:15:02.078014: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xa321fa0 executing computations on platform Host. Devices:
2020-06-03 06:15:02.078055: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-06-03 06:15:02.545326: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-03 06:15:02.564719: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-03 06:15:02.571827: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-03 06:15:02.572733: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xa3219c0 executing computations on platform CUDA. Devices:
2020-06-03 06:15:02.572777: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-06-03 06:15:02.572784: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-06-03 06:15:02.572792: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-03 06:15:02.572800: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-03 06:15:02.573973: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:01:00.0
totalMemory: 23.65GiB freeMemory: 23.22GiB
2020-06-03 06:15:02.574003: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.48GiB
2020-06-03 06:15:02.574026: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4a:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-03 06:15:02.574049: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4b:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-03 06:15:02.574204: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-06-03 06:15:03.350868: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-06-03 06:15:03.350913: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-06-03 06:15:03.350918: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-06-03 06:15:03.350926: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-06-03 06:15:03.350931: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-06-03 06:15:03.350935: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-06-03 06:15:03.351063: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22500 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-06-03 06:15:03.351372: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22757 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-06-03 06:15:03.351654: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10224 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:4a:00.0, compute capability: 7.5)
2020-06-03 06:15:03.351827: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10224 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:4b:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00004-of-00100
Finished processing!
2020-06-03 06:15:16.064586: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 1554, time = 7.99s, wps = 962, train loss = 5.4751
Iteration 1555, time = 5.91s, wps = 1300, train loss = 5.0395
Iteration 1556, time = 0.08s, wps = 90849, train loss = 4.9815
Iteration 1557, time = 0.08s, wps = 93439, train loss = 4.9903
Iteration 1558, time = 0.08s, wps = 96566, train loss = 5.0411
Iteration 1559, time = 0.08s, wps = 98946, train loss = 5.0170
Iteration 1560, time = 0.08s, wps = 97634, train loss = 4.9389
Iteration 1561, time = 0.08s, wps = 98762, train loss = 4.9256
Iteration 1562, time = 0.08s, wps = 101038, train loss = 4.9368
Iteration 1573, time = 0.83s, wps = 101655, train loss = 4.9761
Iteration 1593, time = 1.52s, wps = 101181, train loss = 4.9905
Iteration 1613, time = 1.52s, wps = 100897, train loss = 5.0116
Iteration 1633, time = 1.51s, wps = 101482, train loss = 4.9180
Iteration 1653, time = 1.52s, wps = 101167, train loss = 4.9412
Iteration 1673, time = 1.53s, wps = 100122, train loss = 4.9771
Iteration 1693, time = 1.51s, wps = 101415, train loss = 4.8261
Iteration 1713, time = 1.52s, wps = 100900, train loss = 4.9463
Iteration 1733, time = 1.53s, wps = 100565, train loss = 4.9163
Iteration 1753, time = 1.52s, wps = 100723, train loss = 4.9124
Iteration 1773, time = 1.53s, wps = 100416, train loss = 4.9091
Iteration 1793, time = 1.52s, wps = 100976, train loss = 4.8208
Iteration 1813, time = 1.51s, wps = 101840, train loss = 4.8539
Iteration 1833, time = 1.51s, wps = 101494, train loss = 4.8371
Iteration 1853, time = 1.52s, wps = 101162, train loss = 4.9122
Iteration 1873, time = 1.52s, wps = 101274, train loss = 4.8802
Iteration 1893, time = 1.54s, wps = 99704, train loss = 4.8793
Iteration 1913, time = 1.52s, wps = 101333, train loss = 4.8666
Iteration 1933, time = 1.52s, wps = 101377, train loss = 4.8560
Iteration 1953, time = 1.52s, wps = 101090, train loss = 4.8319
Iteration 1973, time = 1.52s, wps = 101020, train loss = 4.8896
Iteration 1993, time = 1.52s, wps = 100860, train loss = 4.9035
Iteration 2013, time = 1.51s, wps = 101430, train loss = 4.8062
Iteration 2033, time = 1.52s, wps = 101041, train loss = 4.8057
Iteration 2053, time = 1.52s, wps = 101198, train loss = 4.8211
Iteration 2073, time = 1.53s, wps = 100445, train loss = 4.8226
Iteration 2093, time = 1.52s, wps = 101018, train loss = 4.8709
Iteration 2113, time = 1.53s, wps = 100431, train loss = 4.8745
Iteration 2133, time = 1.52s, wps = 101126, train loss = 4.8410
Iteration 2153, time = 1.53s, wps = 100677, train loss = 4.7899
Iteration 2173, time = 1.53s, wps = 100071, train loss = 4.7967
Iteration 2193, time = 1.53s, wps = 100496, train loss = 4.7908
Iteration 2213, time = 1.53s, wps = 100362, train loss = 4.7916
Iteration 2233, time = 1.54s, wps = 99876, train loss = 4.7073
Iteration 2253, time = 1.53s, wps = 100559, train loss = 4.7903
Iteration 2273, time = 1.53s, wps = 100091, train loss = 4.7505
Iteration 2293, time = 1.52s, wps = 100941, train loss = 4.7728
Iteration 2313, time = 1.52s, wps = 100941, train loss = 4.7930
Iteration 2333, time = 1.53s, wps = 100619, train loss = 4.7257
Iteration 2353, time = 1.52s, wps = 100846, train loss = 4.7387
Iteration 2373, time = 1.53s, wps = 100678, train loss = 4.7177
Iteration 2393, time = 1.53s, wps = 100477, train loss = 4.6810
Iteration 2413, time = 1.52s, wps = 101281, train loss = 4.6758
Iteration 2433, time = 1.51s, wps = 101675, train loss = 4.8018
Iteration 2453, time = 1.53s, wps = 100583, train loss = 4.6672
Iteration 2473, time = 1.53s, wps = 100633, train loss = 4.6775
Iteration 2493, time = 1.54s, wps = 99833, train loss = 4.7086
Iteration 2513, time = 1.54s, wps = 99923, train loss = 4.7137
Iteration 2533, time = 1.52s, wps = 100740, train loss = 4.6964
Iteration 2553, time = 1.52s, wps = 101011, train loss = 4.6808
Iteration 2573, time = 1.53s, wps = 100300, train loss = 4.6985
Iteration 2593, time = 1.53s, wps = 100686, train loss = 4.6510
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00005-of-00100
Finished processing!
Iteration 2613, time = 3.15s, wps = 48770, train loss = 4.6342
Iteration 2633, time = 1.53s, wps = 100180, train loss = 4.6782
Iteration 2653, time = 1.53s, wps = 100354, train loss = 4.7009
Iteration 2673, time = 1.53s, wps = 100066, train loss = 4.7926
Iteration 2693, time = 1.54s, wps = 100058, train loss = 4.6356
Iteration 2713, time = 1.53s, wps = 100143, train loss = 4.6168
Iteration 2733, time = 1.53s, wps = 100367, train loss = 4.6509
Iteration 2753, time = 1.53s, wps = 100594, train loss = 4.6966
Iteration 2773, time = 1.52s, wps = 100858, train loss = 4.6538
Iteration 2793, time = 1.54s, wps = 99914, train loss = 4.6758
Iteration 2813, time = 1.53s, wps = 100437, train loss = 4.6381
Iteration 2833, time = 1.52s, wps = 101014, train loss = 4.6426
Iteration 2853, time = 1.54s, wps = 99734, train loss = 4.6120
Iteration 2873, time = 1.54s, wps = 99987, train loss = 4.6720
Iteration 2893, time = 1.53s, wps = 100698, train loss = 4.6239
Iteration 2913, time = 1.53s, wps = 100632, train loss = 4.6368
Iteration 2933, time = 1.53s, wps = 100209, train loss = 4.6397
Iteration 2953, time = 1.53s, wps = 100615, train loss = 4.6690
Iteration 2973, time = 1.53s, wps = 100496, train loss = 4.5775
Iteration 2993, time = 1.54s, wps = 100053, train loss = 4.6019
Iteration 3013, time = 1.53s, wps = 100145, train loss = 4.6479
Iteration 3033, time = 1.54s, wps = 99948, train loss = 4.5360
Iteration 3053, time = 1.54s, wps = 99828, train loss = 4.6050
Iteration 3073, time = 1.53s, wps = 100264, train loss = 4.6817
Iteration 3093, time = 1.53s, wps = 100365, train loss = 4.6326
Iteration 3113, time = 1.55s, wps = 99089, train loss = 4.6071
Iteration 3133, time = 1.53s, wps = 100229, train loss = 4.5960
Iteration 3153, time = 1.52s, wps = 100907, train loss = 4.5800
Iteration 3173, time = 1.53s, wps = 100121, train loss = 4.5580
Iteration 3193, time = 1.53s, wps = 100268, train loss = 4.5926
Iteration 3213, time = 1.53s, wps = 100448, train loss = 4.5462
Iteration 3233, time = 1.53s, wps = 100128, train loss = 4.6334
Iteration 3253, time = 1.54s, wps = 99841, train loss = 4.5698
Iteration 3273, time = 1.53s, wps = 100107, train loss = 4.5802
Iteration 3293, time = 1.55s, wps = 99171, train loss = 4.5019
Iteration 3313, time = 1.53s, wps = 100244, train loss = 4.5722
Iteration 3333, time = 1.52s, wps = 100735, train loss = 4.6002
Iteration 3353, time = 1.53s, wps = 100174, train loss = 4.5830
Iteration 3373, time = 1.54s, wps = 100025, train loss = 4.5220
Iteration 3393, time = 1.53s, wps = 100071, train loss = 4.4945
Iteration 3413, time = 1.54s, wps = 99600, train loss = 4.5976
Iteration 3433, time = 1.53s, wps = 100099, train loss = 4.5882
Iteration 3453, time = 1.52s, wps = 100748, train loss = 4.5462
Iteration 3473, time = 1.54s, wps = 99994, train loss = 4.5364
Iteration 3493, time = 1.53s, wps = 100509, train loss = 4.5697
Iteration 3513, time = 1.53s, wps = 100313, train loss = 4.4601
Iteration 3533, time = 1.53s, wps = 100276, train loss = 4.5213
Iteration 3553, time = 1.54s, wps = 99645, train loss = 4.4855
Iteration 3573, time = 1.54s, wps = 99777, train loss = 4.5976
Iteration 3593, time = 1.54s, wps = 99612, train loss = 4.5202
Iteration 3613, time = 1.55s, wps = 99133, train loss = 4.5394
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m12.149s
user    20m46.644s
sys     4m44.110s
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-
language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'num_sampled': 8192, 'emb_size': 512, 'num_shards': 8, 'num_delayed_steps': 150, 'keep_prob': 0.9, 'max_grad_norm': 10.0, 'num_layers': 1, 'num_steps': 20, 'learning_rate': 0.2, 'batch_size': 128, 'optimizer': 0, 'run_profiler': False, 'state_size': 2048, 'projected_size': 512, 'num_gpus': 2, 'vocab_size': 793470, 'do_summaries': False, 'max_time': 180, 'average_params': True}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1591167216.437351
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-06-03 06:53:36.847326: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2899895000 Hz
2020-06-03 06:53:36.855116: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x8adc030 executing computations on platform Host. Devices:
2020-06-03 06:53:36.855158: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-06-03 06:53:37.296207: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-03 06:53:37.311913: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-03 06:53:37.319091: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-03 06:53:37.320015: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x8adb470 executing computations on platform CUDA. Devices:
2020-06-03 06:53:37.320034: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-06-03 06:53:37.320041: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-06-03 06:53:37.320047: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-03 06:53:37.320054: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-03 06:53:37.321081: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:01:00.0
totalMemory: 23.65GiB freeMemory: 23.22GiB
2020-06-03 06:53:37.321111: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.48GiB
2020-06-03 06:53:37.321135: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4a:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-03 06:53:37.321157: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4b:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-03 06:53:37.321306: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-06-03 06:53:38.108309: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-06-03 06:53:38.108348: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-06-03 06:53:38.108353: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-06-03 06:53:38.108356: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-06-03 06:53:38.108363: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-06-03 06:53:38.108367: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-06-03 06:53:38.108504: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22500 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-06-03 06:53:38.108997: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22757 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-06-03 06:53:38.109296: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10224 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:4a:00.0, compute capability: 7.5)
2020-06-03 06:53:38.109476: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10224 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:4b:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00094-of-00100
Finished processing!
2020-06-03 06:53:47.640481: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 3614, time = 5.47s, wps = 935, train loss = 4.8678
Iteration 3615, time = 3.49s, wps = 1466, train loss = 4.4840
Iteration 3616, time = 0.07s, wps = 75063, train loss = 4.6047
Iteration 3617, time = 0.07s, wps = 78409, train loss = 4.5689
Iteration 3618, time = 0.06s, wps = 83163, train loss = 4.5105
Iteration 3619, time = 0.06s, wps = 85031, train loss = 4.5097
Iteration 3620, time = 0.06s, wps = 88020, train loss = 4.5858
Iteration 3621, time = 0.06s, wps = 89306, train loss = 4.5027
Iteration 3622, time = 0.06s, wps = 90449, train loss = 4.5509
Iteration 3633, time = 0.63s, wps = 88722, train loss = 4.5760
Iteration 3653, time = 1.16s, wps = 88217, train loss = 4.5131
Iteration 3673, time = 1.16s, wps = 87923, train loss = 4.5684
Iteration 3693, time = 1.15s, wps = 88700, train loss = 4.5560
Iteration 3713, time = 1.15s, wps = 88766, train loss = 4.5350
Iteration 3733, time = 1.15s, wps = 88702, train loss = 4.5231
Iteration 3753, time = 1.16s, wps = 88509, train loss = 4.4913
Iteration 3773, time = 1.16s, wps = 88422, train loss = 4.5273
Iteration 3793, time = 1.16s, wps = 88314, train loss = 4.5527
Iteration 3813, time = 1.16s, wps = 88110, train loss = 4.4789
Iteration 3833, time = 1.17s, wps = 87260, train loss = 4.4866
Iteration 3853, time = 1.16s, wps = 88054, train loss = 4.5162
Iteration 3873, time = 1.16s, wps = 88556, train loss = 4.5775
Iteration 3893, time = 1.15s, wps = 88806, train loss = 4.5192
Iteration 3913, time = 1.16s, wps = 88576, train loss = 4.5133
Iteration 3933, time = 1.16s, wps = 88300, train loss = 4.5000
Iteration 3953, time = 1.15s, wps = 88874, train loss = 4.5299
Iteration 3973, time = 1.16s, wps = 88374, train loss = 4.5199
Iteration 3993, time = 1.16s, wps = 88139, train loss = 4.4787
Iteration 4013, time = 1.16s, wps = 88196, train loss = 4.6076
Iteration 4033, time = 1.16s, wps = 88574, train loss = 4.5244
Iteration 4053, time = 1.16s, wps = 88224, train loss = 4.4642
Iteration 4073, time = 1.15s, wps = 89273, train loss = 4.4720
Iteration 4093, time = 1.17s, wps = 87229, train loss = 4.5265
Iteration 4113, time = 1.16s, wps = 88469, train loss = 4.5173
Iteration 4133, time = 1.17s, wps = 87806, train loss = 4.3882
Iteration 4153, time = 1.16s, wps = 88582, train loss = 4.5065
Iteration 4173, time = 1.16s, wps = 87998, train loss = 4.5358
Iteration 4193, time = 1.17s, wps = 87867, train loss = 4.4271
Iteration 4213, time = 1.16s, wps = 88032, train loss = 4.5011
Iteration 4233, time = 1.17s, wps = 87752, train loss = 4.5219
Iteration 4253, time = 1.17s, wps = 87568, train loss = 4.4759
Iteration 4273, time = 1.15s, wps = 88722, train loss = 4.5317
Iteration 4293, time = 1.16s, wps = 88043, train loss = 4.5378
Iteration 4313, time = 1.15s, wps = 88794, train loss = 4.4819
Iteration 4333, time = 1.15s, wps = 88788, train loss = 4.5186
Iteration 4353, time = 1.16s, wps = 88227, train loss = 4.5165
Iteration 4373, time = 1.17s, wps = 87836, train loss = 4.4763
Iteration 4393, time = 1.16s, wps = 87899, train loss = 4.4434
Iteration 4413, time = 1.17s, wps = 87656, train loss = 4.5187
Iteration 4433, time = 1.16s, wps = 88547, train loss = 4.4450
Iteration 4453, time = 1.17s, wps = 87777, train loss = 4.5595
Iteration 4473, time = 1.17s, wps = 87629, train loss = 4.5245
Iteration 4493, time = 1.16s, wps = 88172, train loss = 4.4669
Iteration 4513, time = 1.16s, wps = 88411, train loss = 4.4811
Iteration 4533, time = 1.16s, wps = 88032, train loss = 4.5250
Iteration 4553, time = 1.16s, wps = 88052, train loss = 4.4526
Iteration 4573, time = 1.17s, wps = 87701, train loss = 4.4755
Iteration 4593, time = 1.17s, wps = 87329, train loss = 4.4956
Iteration 4613, time = 1.16s, wps = 88452, train loss = 4.4436
Iteration 4633, time = 1.16s, wps = 88027, train loss = 4.5428
Iteration 4653, time = 1.15s, wps = 88860, train loss = 4.4971
Iteration 4673, time = 1.17s, wps = 87250, train loss = 4.4217
Iteration 4693, time = 1.17s, wps = 87550, train loss = 4.4593
Iteration 4713, time = 1.16s, wps = 88201, train loss = 4.4860
Iteration 4733, time = 1.17s, wps = 87797, train loss = 4.4288
Iteration 4753, time = 1.17s, wps = 87346, train loss = 4.4246
Iteration 4773, time = 1.17s, wps = 87391, train loss = 4.4192
Iteration 4793, time = 1.18s, wps = 86889, train loss = 4.3890
Iteration 4813, time = 1.16s, wps = 88176, train loss = 4.3092
Iteration 4833, time = 1.17s, wps = 87199, train loss = 4.3647
Iteration 4853, time = 1.17s, wps = 87401, train loss = 4.4560
Iteration 4873, time = 1.17s, wps = 87488, train loss = 4.4267
Iteration 4893, time = 1.17s, wps = 87483, train loss = 4.4115
Iteration 4913, time = 1.18s, wps = 87093, train loss = 4.4516
Iteration 4933, time = 1.17s, wps = 87357, train loss = 4.4300
Iteration 4953, time = 1.18s, wps = 86850, train loss = 4.4670
Iteration 4973, time = 1.17s, wps = 87499, train loss = 4.4200
Iteration 4993, time = 1.17s, wps = 87418, train loss = 4.4305
Iteration 5013, time = 1.17s, wps = 87650, train loss = 4.3907
Iteration 5033, time = 1.17s, wps = 87241, train loss = 4.3958
Iteration 5053, time = 1.16s, wps = 88088, train loss = 4.3440
Iteration 5073, time = 1.18s, wps = 86440, train loss = 4.4686
Iteration 5093, time = 1.18s, wps = 87124, train loss = 4.4386
Iteration 5113, time = 1.18s, wps = 87104, train loss = 4.4603
Iteration 5133, time = 1.16s, wps = 88003, train loss = 4.4799
Iteration 5153, time = 1.17s, wps = 87825, train loss = 4.3647
Iteration 5173, time = 1.18s, wps = 86897, train loss = 4.3633
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00009-of-00100
Finished processing!
Iteration 5193, time = 2.81s, wps = 36469, train loss = 4.3928
Iteration 5213, time = 1.17s, wps = 87345, train loss = 4.4795
Iteration 5233, time = 1.17s, wps = 87474, train loss = 4.4817
Iteration 5253, time = 1.16s, wps = 88273, train loss = 4.3708
Iteration 5273, time = 1.17s, wps = 87383, train loss = 4.3196
Iteration 5293, time = 1.18s, wps = 86743, train loss = 4.4190
Iteration 5313, time = 1.16s, wps = 88039, train loss = 4.4681
Iteration 5333, time = 1.18s, wps = 86768, train loss = 4.3777
Iteration 5353, time = 1.17s, wps = 87366, train loss = 4.3040
Iteration 5373, time = 1.18s, wps = 86793, train loss = 4.4552
Iteration 5393, time = 1.19s, wps = 85882, train loss = 4.4708
Iteration 5413, time = 1.18s, wps = 86649, train loss = 4.3482
Iteration 5433, time = 1.16s, wps = 88024, train loss = 4.3637
Iteration 5453, time = 1.16s, wps = 87912, train loss = 4.4305
Iteration 5473, time = 1.17s, wps = 87490, train loss = 4.3914
Iteration 5493, time = 1.17s, wps = 87154, train loss = 4.4440
Iteration 5513, time = 1.18s, wps = 86987, train loss = 4.3718
Iteration 5533, time = 1.18s, wps = 87064, train loss = 4.3400
Iteration 5553, time = 1.17s, wps = 87673, train loss = 4.3648
Iteration 5573, time = 1.18s, wps = 87066, train loss = 4.3788
Iteration 5593, time = 1.17s, wps = 87502, train loss = 4.3119
Iteration 5613, time = 1.17s, wps = 87379, train loss = 4.3556
Iteration 5633, time = 1.17s, wps = 87463, train loss = 4.3470
Iteration 5653, time = 1.17s, wps = 87346, train loss = 4.3826
Iteration 5673, time = 1.17s, wps = 87150, train loss = 4.3451
Iteration 5693, time = 1.18s, wps = 86924, train loss = 4.3694
Iteration 5713, time = 1.17s, wps = 87274, train loss = 4.4547
Iteration 5733, time = 1.19s, wps = 85849, train loss = 4.3178
Iteration 5753, time = 1.18s, wps = 86502, train loss = 4.4738
Iteration 5773, time = 1.18s, wps = 86847, train loss = 4.4295
Iteration 5793, time = 1.18s, wps = 86598, train loss = 4.3353
Iteration 5813, time = 1.18s, wps = 86468, train loss = 4.4448
Iteration 5833, time = 1.19s, wps = 86301, train loss = 4.3416
Iteration 5853, time = 1.19s, wps = 86331, train loss = 4.4121
Iteration 5873, time = 1.19s, wps = 86405, train loss = 4.4255
Iteration 5893, time = 1.18s, wps = 86677, train loss = 4.3251
Iteration 5913, time = 1.18s, wps = 87032, train loss = 4.3185
Iteration 5933, time = 1.19s, wps = 86239, train loss = 4.3007
Iteration 5953, time = 1.18s, wps = 86529, train loss = 4.3834
Iteration 5973, time = 1.20s, wps = 85259, train loss = 4.3080
Iteration 5993, time = 1.19s, wps = 85879, train loss = 4.3320
Iteration 6013, time = 1.19s, wps = 86019, train loss = 4.3887
Iteration 6033, time = 1.19s, wps = 86236, train loss = 4.2480
Iteration 6053, time = 1.20s, wps = 84991, train loss = 4.3835
Iteration 6073, time = 1.20s, wps = 85094, train loss = 4.4241
Iteration 6093, time = 1.22s, wps = 83833, train loss = 4.4051
Iteration 6113, time = 1.23s, wps = 83457, train loss = 4.3515
Iteration 6133, time = 1.20s, wps = 85364, train loss = 4.3845
Iteration 6153, time = 1.21s, wps = 84541, train loss = 4.3476
Iteration 6173, time = 1.21s, wps = 84526, train loss = 4.3430
Iteration 6193, time = 1.21s, wps = 84812, train loss = 4.3007
Iteration 6213, time = 1.21s, wps = 84692, train loss = 4.3563
Iteration 6233, time = 1.22s, wps = 84266, train loss = 4.2622
Iteration 6253, time = 1.23s, wps = 83250, train loss = 4.3158
Iteration 6273, time = 1.22s, wps = 83756, train loss = 4.3634
Iteration 6293, time = 1.23s, wps = 83176, train loss = 4.2666
Iteration 6313, time = 1.22s, wps = 84096, train loss = 4.2942
Iteration 6333, time = 1.23s, wps = 83271, train loss = 4.2959
Iteration 6353, time = 1.21s, wps = 84642, train loss = 4.3734
Iteration 6373, time = 1.22s, wps = 83900, train loss = 4.3770
Iteration 6393, time = 1.23s, wps = 83286, train loss = 4.2528
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m10.429s
user    16m32.994s
sys     4m13.945s
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=1 --datadir=./data/1-billion-word-
language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'num_delayed_steps': 150, 'optimizer': 0, 'num_layers': 1, 'max_time': 180, 'vocab_size': 793470, 'emb_size': 512, 'keep_prob': 0.9, 'batch_size': 128, 'max_grad_norm': 10.0, 'run_profiler': False, 'num_gpus': 1, 'learning_rate': 0.2, 'average_params': True, 'projected_size': 512, 'num_steps': 20, 'num_shards': 8, 'num_sampled': 8192, 'do_summaries': False, 'state_size': 2048}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1591168533.8322992
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-06-03 07:15:34.034349: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2899895000 Hz
2020-06-03 07:15:34.041951: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x6c11bf0 executing computations on platform Host. Devices:
2020-06-03 07:15:34.041994: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-06-03 07:15:34.482687: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-03 07:15:34.487972: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-03 07:15:34.494791: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-03 07:15:34.495713: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x6c11610 executing computations on platform CUDA. Devices:
2020-06-03 07:15:34.495757: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-06-03 07:15:34.495764: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-06-03 07:15:34.495772: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-03 07:15:34.495779: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-03 07:15:34.496944: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:01:00.0
totalMemory: 23.65GiB freeMemory: 23.22GiB
2020-06-03 07:15:34.496974: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.48GiB
2020-06-03 07:15:34.496996: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4a:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-03 07:15:34.497018: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4b:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-03 07:15:34.497155: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-06-03 07:15:35.278226: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-06-03 07:15:35.278266: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-06-03 07:15:35.278271: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-06-03 07:15:35.278274: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-06-03 07:15:35.278280: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-06-03 07:15:35.278285: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-06-03 07:15:35.278419: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22500 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-06-03 07:15:35.278739: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22757 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-06-03 07:15:35.278886: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10224 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:4a:00.0, compute capability: 7.5)
2020-06-03 07:15:35.279188: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10224 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:4b:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00091-of-00100
Finished processing!
2020-06-03 07:15:42.298771: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 6394, time = 3.57s, wps = 717, train loss = 4.6328
Iteration 6395, time = 1.76s, wps = 1456, train loss = 4.3652
Iteration 6396, time = 0.06s, wps = 45477, train loss = 4.4431
Iteration 6397, time = 0.06s, wps = 45741, train loss = 4.2791
Iteration 6398, time = 0.05s, wps = 51057, train loss = 4.3039
Iteration 6399, time = 0.04s, wps = 59440, train loss = 4.3749
Iteration 6400, time = 0.05s, wps = 53158, train loss = 4.3389
Iteration 6401, time = 0.05s, wps = 52930, train loss = 4.4150
Iteration 6402, time = 0.05s, wps = 55737, train loss = 4.3396
Iteration 6413, time = 0.51s, wps = 55659, train loss = 4.3267
Iteration 6433, time = 0.92s, wps = 55577, train loss = 4.4290
Iteration 6453, time = 0.94s, wps = 54545, train loss = 4.2598
Iteration 6473, time = 0.93s, wps = 54857, train loss = 4.3630
Iteration 6493, time = 0.92s, wps = 55352, train loss = 4.2277
Iteration 6513, time = 0.94s, wps = 54550, train loss = 4.4559
Iteration 6533, time = 0.92s, wps = 55376, train loss = 4.4560
Iteration 6553, time = 0.95s, wps = 54168, train loss = 4.3148
Iteration 6573, time = 0.93s, wps = 55340, train loss = 4.3937
Iteration 6593, time = 0.95s, wps = 53818, train loss = 4.3581
Iteration 6613, time = 0.94s, wps = 54569, train loss = 4.3764
Iteration 6633, time = 0.92s, wps = 55684, train loss = 4.3135
Iteration 6653, time = 0.94s, wps = 54715, train loss = 4.3964
Iteration 6673, time = 0.94s, wps = 54751, train loss = 4.4327
Iteration 6693, time = 0.93s, wps = 55075, train loss = 4.3209
Iteration 6713, time = 0.94s, wps = 54704, train loss = 4.3231
Iteration 6733, time = 0.94s, wps = 54524, train loss = 4.2645
Iteration 6753, time = 0.94s, wps = 54638, train loss = 4.2926
Iteration 6773, time = 0.94s, wps = 54663, train loss = 4.3462
Iteration 6793, time = 0.93s, wps = 54821, train loss = 4.3207
Iteration 6813, time = 0.94s, wps = 54577, train loss = 4.3677
Iteration 6833, time = 0.93s, wps = 55094, train loss = 4.3042
Iteration 6853, time = 0.93s, wps = 54775, train loss = 4.3983
Iteration 6873, time = 0.95s, wps = 53803, train loss = 4.2450
Iteration 6893, time = 0.93s, wps = 54817, train loss = 4.3503
Iteration 6913, time = 0.95s, wps = 54001, train loss = 4.3940
Iteration 6933, time = 0.94s, wps = 54621, train loss = 4.2624
Iteration 6953, time = 0.94s, wps = 54508, train loss = 4.4234
Iteration 6973, time = 0.94s, wps = 54483, train loss = 4.3869
Iteration 6993, time = 0.91s, wps = 56523, train loss = 4.4242
Iteration 7013, time = 0.95s, wps = 53779, train loss = 4.3616
Iteration 7033, time = 0.93s, wps = 55234, train loss = 4.3318
Iteration 7053, time = 0.94s, wps = 54305, train loss = 4.3325
Iteration 7073, time = 0.94s, wps = 54339, train loss = 4.2881
Iteration 7093, time = 0.94s, wps = 54402, train loss = 4.3102
Iteration 7113, time = 0.93s, wps = 54808, train loss = 4.4002
Iteration 7133, time = 0.93s, wps = 55153, train loss = 4.3108
Iteration 7153, time = 0.94s, wps = 54514, train loss = 4.3960
Iteration 7173, time = 0.94s, wps = 54705, train loss = 4.2713
Iteration 7193, time = 0.94s, wps = 54404, train loss = 4.4404
Iteration 7213, time = 0.92s, wps = 55414, train loss = 4.4967
Iteration 7233, time = 0.95s, wps = 54007, train loss = 4.3285
Iteration 7253, time = 0.93s, wps = 55084, train loss = 4.5062
Iteration 7273, time = 0.94s, wps = 54250, train loss = 4.3463
Iteration 7293, time = 0.94s, wps = 54663, train loss = 4.2888
Iteration 7313, time = 0.95s, wps = 54173, train loss = 4.1930
Iteration 7333, time = 0.94s, wps = 54444, train loss = 4.3715
Iteration 7353, time = 0.94s, wps = 54318, train loss = 4.2308
Iteration 7373, time = 0.94s, wps = 54509, train loss = 4.4212
Iteration 7393, time = 0.93s, wps = 54946, train loss = 4.3454
Iteration 7413, time = 0.94s, wps = 54450, train loss = 4.3645
Iteration 7433, time = 0.92s, wps = 55472, train loss = 4.3359
Iteration 7453, time = 0.95s, wps = 53829, train loss = 4.3780
Iteration 7473, time = 0.94s, wps = 54545, train loss = 4.4879
Iteration 7493, time = 0.94s, wps = 54392, train loss = 4.4763
Iteration 7513, time = 0.94s, wps = 54279, train loss = 4.3989
Iteration 7533, time = 0.96s, wps = 53572, train loss = 4.4138
Iteration 7553, time = 0.95s, wps = 54161, train loss = 4.4474
Iteration 7573, time = 0.94s, wps = 54383, train loss = 4.3983
Iteration 7593, time = 0.95s, wps = 54173, train loss = 4.3881
Iteration 7613, time = 0.94s, wps = 54345, train loss = 4.3879
Iteration 7633, time = 0.94s, wps = 54291, train loss = 4.3441
Iteration 7653, time = 0.93s, wps = 54866, train loss = 4.3570
Iteration 7673, time = 0.94s, wps = 54255, train loss = 4.2808
Iteration 7693, time = 0.95s, wps = 54044, train loss = 4.3711
Iteration 7713, time = 0.94s, wps = 54337, train loss = 4.3886
Iteration 7733, time = 0.96s, wps = 53363, train loss = 4.2786
Iteration 7753, time = 0.95s, wps = 53862, train loss = 4.3949
Iteration 7773, time = 0.94s, wps = 54391, train loss = 4.2481
Iteration 7793, time = 0.95s, wps = 53724, train loss = 4.2046
Iteration 7813, time = 0.94s, wps = 54274, train loss = 4.3458
Iteration 7833, time = 0.94s, wps = 54480, train loss = 4.2544
Iteration 7853, time = 0.95s, wps = 54097, train loss = 4.3716
Iteration 7873, time = 0.94s, wps = 54293, train loss = 4.3627
Iteration 7893, time = 0.95s, wps = 53641, train loss = 4.3434
Iteration 7913, time = 0.94s, wps = 54312, train loss = 4.3352
Iteration 7933, time = 0.96s, wps = 53567, train loss = 4.2931
Iteration 7953, time = 0.94s, wps = 54717, train loss = 4.2763
Iteration 7973, time = 0.95s, wps = 54027, train loss = 4.3028
Iteration 7993, time = 0.95s, wps = 53709, train loss = 4.2917
Iteration 8013, time = 0.95s, wps = 54153, train loss = 4.3100
Iteration 8033, time = 0.96s, wps = 53575, train loss = 4.2656
Iteration 8053, time = 0.95s, wps = 53625, train loss = 4.3135
Iteration 8073, time = 0.93s, wps = 54940, train loss = 4.3616
Iteration 8093, time = 0.94s, wps = 54188, train loss = 4.4242
Iteration 8113, time = 0.96s, wps = 53498, train loss = 4.3343
Iteration 8133, time = 0.94s, wps = 54205, train loss = 4.2469
Iteration 8153, time = 0.95s, wps = 54082, train loss = 4.3439
Iteration 8173, time = 0.95s, wps = 53714, train loss = 4.3159
Iteration 8193, time = 0.94s, wps = 54330, train loss = 4.2124
Iteration 8213, time = 0.94s, wps = 54392, train loss = 4.3030
Iteration 8233, time = 0.95s, wps = 53711, train loss = 4.3429
Iteration 8253, time = 0.96s, wps = 53435, train loss = 4.4296
Iteration 8273, time = 0.95s, wps = 53746, train loss = 4.2958
Iteration 8293, time = 0.93s, wps = 54838, train loss = 4.2449
Iteration 8313, time = 0.95s, wps = 53994, train loss = 4.2895
Iteration 8333, time = 0.96s, wps = 53611, train loss = 4.3186
Iteration 8353, time = 0.94s, wps = 54285, train loss = 4.2396
Iteration 8373, time = 0.95s, wps = 53845, train loss = 4.1847
Iteration 8393, time = 0.95s, wps = 53745, train loss = 4.3921
Iteration 8413, time = 0.95s, wps = 54008, train loss = 4.3796
Iteration 8433, time = 0.96s, wps = 53458, train loss = 4.2182
Iteration 8453, time = 0.95s, wps = 54082, train loss = 4.2570
Iteration 8473, time = 0.96s, wps = 53380, train loss = 4.2552
Iteration 8493, time = 0.97s, wps = 52691, train loss = 4.3227
Iteration 8513, time = 0.96s, wps = 53284, train loss = 4.2984
Iteration 8533, time = 0.95s, wps = 53722, train loss = 4.3313
Iteration 8553, time = 0.94s, wps = 54295, train loss = 4.4639
Iteration 8573, time = 0.96s, wps = 53212, train loss = 4.2594
Iteration 8593, time = 0.96s, wps = 53489, train loss = 4.4553
Iteration 8613, time = 0.97s, wps = 52857, train loss = 4.2739
Iteration 8633, time = 0.96s, wps = 53158, train loss = 4.3768
Iteration 8653, time = 0.96s, wps = 53219, train loss = 4.2777
Iteration 8673, time = 0.96s, wps = 53413, train loss = 4.3553
Iteration 8693, time = 0.95s, wps = 53643, train loss = 4.3312
Iteration 8713, time = 0.94s, wps = 54271, train loss = 4.3457
Iteration 8733, time = 0.97s, wps = 52936, train loss = 4.2681
Iteration 8753, time = 0.98s, wps = 52285, train loss = 4.3173
Iteration 8773, time = 0.97s, wps = 52666, train loss = 4.2337
Iteration 8793, time = 0.98s, wps = 52052, train loss = 4.3762
Iteration 8813, time = 0.97s, wps = 52546, train loss = 4.2843
Iteration 8833, time = 0.98s, wps = 52360, train loss = 4.3714
Iteration 8853, time = 0.98s, wps = 52375, train loss = 4.2731
Iteration 8873, time = 0.98s, wps = 52066, train loss = 4.1564
Iteration 8893, time = 0.97s, wps = 52742, train loss = 4.1500
Iteration 8913, time = 0.98s, wps = 52026, train loss = 4.2470
Iteration 8933, time = 0.98s, wps = 52021, train loss = 4.2909
Iteration 8953, time = 0.98s, wps = 52422, train loss = 4.3040
Iteration 8973, time = 0.99s, wps = 51642, train loss = 4.3504
Iteration 8993, time = 0.98s, wps = 52466, train loss = 4.3055
Iteration 9013, time = 0.98s, wps = 52070, train loss = 4.3722
Iteration 9033, time = 1.01s, wps = 50879, train loss = 4.3648
Iteration 9053, time = 1.00s, wps = 50995, train loss = 4.3637
Iteration 9073, time = 1.00s, wps = 51127, train loss = 4.2620
Iteration 9093, time = 0.98s, wps = 52154, train loss = 4.3559
Iteration 9113, time = 1.02s, wps = 50324, train loss = 4.1363
Iteration 9133, time = 0.98s, wps = 52297, train loss = 4.2784
Iteration 9153, time = 1.00s, wps = 51284, train loss = 4.2773
Iteration 9173, time = 1.00s, wps = 51326, train loss = 4.3164
Iteration 9193, time = 0.99s, wps = 51602, train loss = 4.2849
Iteration 9213, time = 1.01s, wps = 50726, train loss = 4.2825
Iteration 9233, time = 1.02s, wps = 50301, train loss = 4.3729
Iteration 9253, time = 1.01s, wps = 50939, train loss = 4.2780
Iteration 9273, time = 0.99s, wps = 51482, train loss = 4.2565
Iteration 9293, time = 1.02s, wps = 50047, train loss = 4.1767
Iteration 9313, time = 1.01s, wps = 50908, train loss = 4.2802
Iteration 9333, time = 1.00s, wps = 51265, train loss = 4.2705
Iteration 9353, time = 1.02s, wps = 50175, train loss = 4.2976
Iteration 9373, time = 1.03s, wps = 49735, train loss = 4.2392
Iteration 9393, time = 1.03s, wps = 49621, train loss = 4.2612
Iteration 9413, time = 1.02s, wps = 50434, train loss = 4.2829
Iteration 9433, time = 1.04s, wps = 49167, train loss = 4.3474
Iteration 9453, time = 1.00s, wps = 51312, train loss = 4.3472
Iteration 9473, time = 1.03s, wps = 49647, train loss = 4.2642
Iteration 9493, time = 1.03s, wps = 49620, train loss = 4.2498
Iteration 9513, time = 1.03s, wps = 49915, train loss = 4.2757
Iteration 9533, time = 1.04s, wps = 49397, train loss = 4.3515
Iteration 9553, time = 1.01s, wps = 50891, train loss = 4.2123
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00012-of-00100
Finished processing!
Iteration 9573, time = 2.65s, wps = 19355, train loss = 4.1311
Iteration 9593, time = 1.05s, wps = 48842, train loss = 4.3201
Iteration 9613, time = 1.06s, wps = 48438, train loss = 4.3725
Iteration 9633, time = 1.05s, wps = 48580, train loss = 4.2646
Iteration 9653, time = 1.00s, wps = 51349, train loss = 4.2164
Iteration 9673, time = 1.04s, wps = 49383, train loss = 4.2856
Iteration 9693, time = 1.04s, wps = 49070, train loss = 4.1010
Iteration 9713, time = 1.05s, wps = 48622, train loss = 4.2809
Iteration 9733, time = 1.03s, wps = 49876, train loss = 4.2714
Iteration 9753, time = 1.04s, wps = 49333, train loss = 4.2273
Iteration 9773, time = 1.04s, wps = 49069, train loss = 4.2901
Iteration 9793, time = 1.05s, wps = 48759, train loss = 4.3234
Iteration 9813, time = 1.05s, wps = 48595, train loss = 4.3273
Iteration 9833, time = 1.07s, wps = 48053, train loss = 4.2884
Iteration 9853, time = 1.06s, wps = 48129, train loss = 4.1856
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m8.816s
user    9m21.619s
sys     2m59.818s
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm# cat /etc/os-release
NAME="Ubuntu"
VERSION="16.04.6 LTS (Xenial Xerus)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 16.04.6 LTS"
VERSION_ID="16.04"
HOME_URL="http://www.ubuntu.com/"
SUPPORT_URL="http://help.ubuntu.com/"
BUG_REPORT_URL="http://bugs.launchpad.net/ubuntu/"
VERSION_CODENAME=xenial
UBUNTU_CODENAME=xenial
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm# nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Fri_Feb__8_19:08:17_PST_2019
Cuda compilation tools, release 10.1, V10.1.105
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm# cd data
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm/data# ls
1-billion-word-language-modeling-benchmark-r13output
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm/data# cd 1-billion-word-language-modeling-benchmark-r13output
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# ls
1b_word_vocab.txt  heldout-monolingual.tokenized.shuffled
README             training-monolingual.tokenized.shuffled
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# cd training-monolingual.tokenized.shuffled
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# ls
news.en-00001-of-00100  news.en-00034-of-00100  news.en-00067-of-00100
news.en-00002-of-00100  news.en-00035-of-00100  news.en-00068-of-00100
news.en-00003-of-00100  news.en-00036-of-00100  news.en-00069-of-00100
news.en-00004-of-00100  news.en-00037-of-00100  news.en-00070-of-00100
news.en-00005-of-00100  news.en-00038-of-00100  news.en-00071-of-00100
news.en-00006-of-00100  news.en-00039-of-00100  news.en-00072-of-00100
news.en-00007-of-00100  news.en-00040-of-00100  news.en-00073-of-00100
news.en-00008-of-00100  news.en-00041-of-00100  news.en-00074-of-00100
news.en-00009-of-00100  news.en-00042-of-00100  news.en-00075-of-00100
news.en-00010-of-00100  news.en-00043-of-00100  news.en-00076-of-00100
news.en-00011-of-00100  news.en-00044-of-00100  news.en-00077-of-00100
news.en-00012-of-00100  news.en-00045-of-00100  news.en-00078-of-00100
news.en-00013-of-00100  news.en-00046-of-00100  news.en-00079-of-00100
news.en-00014-of-00100  news.en-00047-of-00100  news.en-00080-of-00100
news.en-00015-of-00100  news.en-00048-of-00100  news.en-00081-of-00100
news.en-00016-of-00100  news.en-00049-of-00100  news.en-00082-of-00100
news.en-00017-of-00100  news.en-00050-of-00100  news.en-00083-of-00100
news.en-00018-of-00100  news.en-00051-of-00100  news.en-00084-of-00100
news.en-00019-of-00100  news.en-00052-of-00100  news.en-00085-of-00100
news.en-00020-of-00100  news.en-00053-of-00100  news.en-00086-of-00100
news.en-00021-of-00100  news.en-00054-of-00100  news.en-00087-of-00100
news.en-00022-of-00100  news.en-00055-of-00100  news.en-00088-of-00100
news.en-00023-of-00100  news.en-00056-of-00100  news.en-00089-of-00100
news.en-00024-of-00100  news.en-00057-of-00100  news.en-00090-of-00100
news.en-00025-of-00100  news.en-00058-of-00100  news.en-00091-of-00100
news.en-00026-of-00100  news.en-00059-of-00100  news.en-00092-of-00100
news.en-00027-of-00100  news.en-00060-of-00100  news.en-00093-of-00100
news.en-00028-of-00100  news.en-00061-of-00100  news.en-00094-of-00100
news.en-00029-of-00100  news.en-00062-of-00100  news.en-00095-of-00100
news.en-00030-of-00100  news.en-00063-of-00100  news.en-00096-of-00100
news.en-00031-of-00100  news.en-00064-of-00100  news.en-00097-of-00100
news.en-00032-of-00100  news.en-00065-of-00100  news.en-00098-of-00100
news.en-00033-of-00100  news.en-00066-of-00100  news.en-00099-of-00100
root@e0a73ff92515:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# exit
exit
[chibi@centos8 ~]$ cat /etc/redhat-release
CentOS Linux release 8.1.1911 (Core)
[chibi@centos8 ~]$ nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Wed_Oct_23_19:24:38_PDT_2019
Cuda compilation tools, release 10.2, V10.2.89
[chibi@centos8 ~]$ sensors
eth0-pci-4400
Adapter: PCI adapter
PHY Temperature:  +53.7°C

k10temp-pci-00c3
Adapter: PCI adapter
Tdie:         +40.8°C  (high = +70.0°C)
Tctl:         +40.8°C

iwlwifi-virtual-0
Adapter: Virtual device
temp1:        +41.0°C

[chibi@centos8 ~]$ sudo hddtemp /dev/sda
[sudo] chibi のパスワード:
/dev/sda: Samsung SSD 840 PRO Series: 35°C
[chibi@centos8 ~]$ nvidia-smi nvlink -c
GPU 0: TITAN RTX (UUID: GPU-7fb51c1d-c1e7-35cc-aad7-66971f05ddb7)
GPU 1: TITAN RTX (UUID: GPU-5a71d61e-f130-637a-b33d-4df555b0ed88)
GPU 2: GeForce RTX 2080 Ti (UUID: GPU-1ac935c2-557f-282e-14e5-3f749ffd63ac)
GPU 3: GeForce RTX 2080 Ti (UUID: GPU-13277ce5-e1e9-0cb1-8cee-6c9e6618e774)
[chibi@centos8 ~]$ sensors
eth0-pci-4400
Adapter: PCI adapter
PHY Temperature:  +53.0°C

k10temp-pci-00c3
Adapter: PCI adapter
Tdie:         +39.8°C  (high = +70.0°C)
Tctl:         +39.8°C

iwlwifi-virtual-0
Adapter: Virtual device
temp1:        +41.0°C

[chibi@centos8 ~]$ cat /proc/cpuinfo
processor       : 0
vendor_id       : AuthenticAMD
cpu family      : 23
model           : 49
model name      : AMD Ryzen Threadripper 3990X 64-Core Processor
stepping        : 0
microcode       : 0x8301025
cpu MHz         : 3599.123
cache size      : 512 KB
physical id     : 0
siblings        : 128
core id         : 0
cpu cores       : 64
apicid          : 0
initial apicid  : 0
fpu             : yes
fpu_exception   : yes
cpuid level     : 16
wp              : yes