[chibi@centos8 ~]$ sudo nvidia-docker run --rm -ti nvcr.io/nvidia/tensorflow:19.04-py3
Unable to find image 'nvcr.io/nvidia/tensorflow:19.04-py3' locally
19.04-py3: Pulling from nvidia/tensorflow
34667c7e4631: Pulling fs layer
d18d76a881a4: Pulling fs layer
119c7358fbfc: Pulling fs layer
2aaf13f3eff0: Pulling fs layer
2aaf13f3eff0: Waiting
3b700a61ede6: Pulling fs layer
87e6ca450d3f: Pulling fs layer
87e6ca450d3f: Waiting
9b91fa2f9276: Pulling fs layer
b5877a9add73: Pulling fs layer
a1e76dce1aec: Waiting
534bbf505504: Pulling fs layer
bab74df105f1: Waiting
9b91fa2f9276: Waiting
b5877a9add73: Waiting
5db2639932b5: Pulling fs layer
4956bf3bbbb9: Waiting
8071b94b5429: Pulling fs layer
6eb8eba2ad5a: Pulling fs layer
e32e86c15b8b: Pulling fs layer
e32e86c15b8b: Waiting
f4371944c97d: Waiting
4615a735431d: Waiting
62819d8896c1: Waiting
34bc85bf8bef: Waiting
4a95ca3431c4: Pulling fs layer
41bc2d0a4d4d: Pulling fs layer
4a95ca3431c4: Waiting
2d0c5308ff92: Pulling fs layer
8071b94b5429: Waiting
a2ceadc61854: Waiting
8d9313624ab7: Pulling fs layer
e5cafe011f22: Pull complete
eca19a329cd4: Pull complete
65ee50af0bcc: Pull complete
5f60ec8c32f4: Pull complete
d7dcb657fa13: Pull complete
1f6ef6575fbe: Pull complete
d1ef346a3015: Pull complete
4ef9cb404fd5: Pull complete
f6797f45a018: Pull complete
1d4380527325: Pull complete
965f2629db02: Pull complete
5debff4c8c0a: Pull complete
b3a3a9d82be6: Pull complete
eac05f20b729: Pull complete
3ce0a7f80167: Pull complete
2a21e34a5784: Pull complete
c1ccf19e258e: Pull complete
0b6ea9d0652b: Pull complete
307bc8c3f024: Pull complete
ca75fd593a79: Pull complete
0cd3cdca1af7: Pull complete
48e857e9d372: Pull complete
3264ea403ca9: Pull complete
Digest: sha256:aaebc136d5d50937362675c77afd908bd96cded68846f39163050a023c8a9851
Status: Downloaded newer image for nvcr.io/nvidia/tensorflow:19.04-py3
                                                                                
================
== TensorFlow ==
================

NVIDIA Release 19.04 (build 6132408)
TensorFlow Version 1.13.1

Container image Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
Copyright 2017-2019 The TensorFlow Authors.  All rights reserved.

Various files include modifications (c) NVIDIA CORPORATION.  All rights reserved.
NVIDIA modifications are covered by the license terms that apply to the underlying project or file.

NOTE: MOFED driver for multi-node communication was not detected.
      Multi-node communication performance may be reduced.

NOTE: The SHMEM allocation limit is set to the default of 64MB.  This may be
   insufficient for TensorFlow.  NVIDIA recommends the use of the following flags:
   nvidia-docker run --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 ...

root@3a240370f11d:/workspace# ls
README.md  docker-examples  nvidia-examples
root@3a240370f11d:/workspace# cd nvidia-examples
root@3a240370f11d:/workspace/nvidia-examples# ls
NCF              bert                 cnn           ssdv1.2
OpenSeq2Seq      big_lstm             gnmt_v2       tensorrt
UNet_Industrial  build_imagenet_data  resnet50v1.5
root@3a240370f11d:/workspace/nvidia-examples# cd big_lstm
root@3a240370f11d:/workspace/nvidia-examples/big_lstm# ls
1b_word_vocab.txt  data_utils_test.py         language_model_test.py
README.md          download_1b_words_data.sh  model_utils.py
__init__.py        hparams.py                 run_utils.py
common.py          hparams_test.py            single_lm_train.py
data_utils.py      language_model.py          testdata
root@3a240370f11d:/workspace/nvidia-examples/big_lstm# ./download_1b_words_data.sh
Please specify root of dataset directory: data

Success: dataset root dir validated

--2020-06-01 19:03:53--  http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
Resolving www.statmt.org (www.statmt.org)... 129.215.197.184
Connecting to www.statmt.org (www.statmt.org)|129.215.197.184|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1792209805 (1.7G) [application/x-gzip]
Saving to: ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’

1-billion-word-lang 100%[===================>]   1.67G  1.55MB/s    in 18m 42s

2020-06-01 19:22:37 (1.52 MB/s) - ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’ saved [1792209805/1792209805]

1-billion-word-language-modeling-benchmark-r13output/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00024-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00057-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00055-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00096-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00081-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00033-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00072-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00082-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00018-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00008-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00059-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00005-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00091-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00062-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00031-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00095-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00076-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00006-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00038-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00015-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00087-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00021-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00049-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00009-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00027-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00056-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00046-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00032-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00029-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00088-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00085-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00011-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00012-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00067-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00003-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00093-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00050-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00053-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00044-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00019-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00066-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00028-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00045-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00039-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00071-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00052-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00078-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00037-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00002-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00014-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00048-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00017-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00004-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00077-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00080-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00020-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00051-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00016-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00079-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00043-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00068-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00099-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00064-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00034-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00054-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00040-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00070-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00063-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00041-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00083-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00061-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00073-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00094-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00030-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00060-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00035-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00023-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00042-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00025-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00090-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00089-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00065-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00075-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00022-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00026-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00098-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00084-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00010-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00069-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00013-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00092-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00036-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00097-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00007-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00074-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00001-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00047-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00086-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00058-of-00100
1-billion-word-language-modeling-benchmark-r13output/.svn/
1-billion-word-language-modeling-benchmark-r13output/.svn/tmp/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/de102cd0c91cd19e6612f0840e68a2f20ba8134c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/deed1b75d3bd5cc36ae6aeb85d56680b892b7948.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/86c58db52fbf362c5bc329afc33b8805085fcb0d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/9f2882e21f860a83ad6ea8898ebab140974ed301.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/bcdbc523ee7488dc438cab869b6d5e236578dbfa.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/d2718bc26d0ee0a213d7d4add99a304cb5b39ede.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/c5b24f61479da923123d0394a188da922ea0359c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/116d6ea61730d8199127596b072e981338597779.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/b0e26559cfe641245584a9400b35ba28d64f1411.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/d3ae508e3bcb0e696dd70aecd052410f1f7afc1d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/9e148bd766e8805e0eb97eeae250433ec7a2e996.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/31b645a482e0b81fda3c567cada307c6fcf7ec80.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/da39a3ee5e6b4b0d3255bfef95601890afd80709.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/c1ed42c415ec884e591fb5c70d373da640a383b5.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/e37ba0f85e94073ccaced1eed7e4f5d737a25f49.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/entries
1-billion-word-language-modeling-benchmark-r13output/.svn/format
1-billion-word-language-modeling-benchmark-r13output/.svn/wc.db
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00015-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00031-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00027-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00010-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00033-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00042-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00046-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00037-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00029-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00013-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00002-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00048-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00006-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00030-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00025-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00039-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00008-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00020-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00001-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00034-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00044-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00045-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00016-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00004-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00035-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00038-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00009-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00024-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00022-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00021-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00032-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00011-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00049-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00041-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00019-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00023-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00040-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00014-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00007-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00017-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00012-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00018-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00003-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00028-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en-00000-of-00100
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00043-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00005-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00036-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00026-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00047-of-00050
1-billion-word-language-modeling-benchmark-r13output/README

Success! One billion words dataset ready at:
data/1-billion-word-language-modeling-benchmark-r13output/
Please pass this dir to single_lm_train.py via the --datadir option.

root@3a240370f11d:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=4 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'keep_prob': 0.9, 'max_time': 180, 'projected_size': 512, 'emb_size': 512, 'do_summaries': False, 'state_size': 2048, 'vocab_size': 793470, 'num_steps': 20, 'learning_rate': 0.2, 'average_params': True, 'batch_size': 128, 'num_gpus': 4, 'num_delayed_steps': 150, 'num_sampled': 8192, 'max_grad_norm': 10.0, 'num_shards': 8, 'run_profiler': False, 'num_layers': 1, 'optimizer': 0}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1591039397.5088053
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
model/model_2/state_2_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:2
model/model_3/state_3_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:3
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-06-01 19:23:18.172631: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3700135000 Hz
2020-06-01 19:23:18.176601: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xc5efc00 executing computations on platform Host. Devices:
2020-06-01 19:23:18.176639: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-06-01 19:23:18.577990: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-01 19:23:18.606887: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-01 19:23:18.612018: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-01 19:23:18.619558: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0xc49ff60 executing computations on platform CUDA. Devices:
2020-06-01 19:23:18.619595: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-06-01 19:23:18.619602: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-06-01 19:23:18.619611: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-01 19:23:18.619617: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-01 19:23:18.620706: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:01:00.0
totalMemory: 23.65GiB freeMemory: 23.22GiB
2020-06-01 19:23:18.620734: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.48GiB
2020-06-01 19:23:18.620756: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4a:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-01 19:23:18.620777: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4b:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-01 19:23:18.620916: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-06-01 19:23:19.394693: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-06-01 19:23:19.394734: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-06-01 19:23:19.394738: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-06-01 19:23:19.394742: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-06-01 19:23:19.394745: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-06-01 19:23:19.394750: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-06-01 19:23:19.394886: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22500 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-06-01 19:23:19.395214: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22757 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-06-01 19:23:19.395491: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10224 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:4a:00.0, compute capability: 7.5)
2020-06-01 19:23:19.395776: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10224 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:4b:00.0, compute capability: 7.5)
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00071-of-00100
Finished processing!
2020-06-01 19:23:41.022914: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 1, time = 12.14s, wps = 844, train loss = 12.9700
Iteration 2, time = 8.85s, wps = 1158, train loss = 12.9642
Iteration 3, time = 0.10s, wps = 99562, train loss = 12.8554
Iteration 4, time = 0.10s, wps = 102115, train loss = 11.4194
Iteration 5, time = 0.10s, wps = 105155, train loss = 117.3372
Iteration 6, time = 0.10s, wps = 103912, train loss = 62.4686
Iteration 7, time = 0.10s, wps = 106971, train loss = 18.4020
Iteration 8, time = 0.10s, wps = 106545, train loss = 31.3002
Iteration 9, time = 0.10s, wps = 104590, train loss = 14.9296
Iteration 20, time = 1.07s, wps = 105302, train loss = 14.5501
Iteration 40, time = 1.95s, wps = 105152, train loss = 8.9229
Iteration 60, time = 1.94s, wps = 105451, train loss = 8.7119
Iteration 80, time = 1.96s, wps = 104709, train loss = 7.7369
Iteration 100, time = 1.95s, wps = 105057, train loss = 7.8886
Iteration 120, time = 1.95s, wps = 105271, train loss = 7.3495
Iteration 140, time = 1.96s, wps = 104455, train loss = 6.9955
Iteration 160, time = 1.95s, wps = 105122, train loss = 6.8628
Iteration 180, time = 1.95s, wps = 105052, train loss = 6.5707
Iteration 200, time = 1.95s, wps = 105280, train loss = 6.6146
Iteration 220, time = 1.96s, wps = 104581, train loss = 6.3471
Iteration 240, time = 1.94s, wps = 105430, train loss = 6.1645
Iteration 260, time = 1.96s, wps = 104480, train loss = 6.2420
Iteration 280, time = 1.95s, wps = 104938, train loss = 6.0895
Iteration 300, time = 1.95s, wps = 105044, train loss = 6.0087
Iteration 320, time = 1.95s, wps = 104854, train loss = 6.0071
Iteration 340, time = 1.95s, wps = 104955, train loss = 5.9325
Iteration 360, time = 1.94s, wps = 105381, train loss = 5.9051
Iteration 380, time = 1.96s, wps = 104607, train loss = 5.8991
Iteration 400, time = 1.97s, wps = 103875, train loss = 5.9477
Iteration 420, time = 1.94s, wps = 105322, train loss = 5.9076
Iteration 440, time = 1.95s, wps = 105011, train loss = 5.7921
Iteration 460, time = 1.95s, wps = 105041, train loss = 5.7777
Iteration 480, time = 1.95s, wps = 104878, train loss = 5.7608
Iteration 500, time = 1.97s, wps = 104165, train loss = 5.6876
Iteration 520, time = 1.96s, wps = 104605, train loss = 5.7221
Iteration 540, time = 1.95s, wps = 104997, train loss = 5.6887
Iteration 560, time = 1.95s, wps = 105052, train loss = 5.5963
Iteration 580, time = 1.96s, wps = 104293, train loss = 5.6109
Iteration 600, time = 1.96s, wps = 104702, train loss = 5.5595
Iteration 620, time = 1.96s, wps = 104361, train loss = 5.5623
Iteration 640, time = 1.94s, wps = 105378, train loss = 5.5315
Iteration 660, time = 1.95s, wps = 104789, train loss = 5.4422
Iteration 680, time = 1.94s, wps = 105503, train loss = 5.5043
Iteration 700, time = 1.95s, wps = 105230, train loss = 5.4858
Iteration 720, time = 1.96s, wps = 104680, train loss = 5.3905
Iteration 740, time = 1.95s, wps = 104913, train loss = 5.4109
Iteration 760, time = 1.96s, wps = 104509, train loss = 5.4050
Iteration 780, time = 1.95s, wps = 104935, train loss = 5.4890
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00040-of-00100
Finished processing!
Iteration 800, time = 3.48s, wps = 58909, train loss = 5.3896
Iteration 820, time = 1.96s, wps = 104395, train loss = 5.3430
Iteration 840, time = 1.96s, wps = 104728, train loss = 5.3304
Iteration 860, time = 1.95s, wps = 105195, train loss = 5.3380
Iteration 880, time = 1.96s, wps = 104457, train loss = 5.3119
Iteration 900, time = 1.96s, wps = 104554, train loss = 5.3242
Iteration 920, time = 1.95s, wps = 104962, train loss = 5.2662
Iteration 940, time = 1.95s, wps = 104830, train loss = 5.2623
Iteration 960, time = 1.95s, wps = 104827, train loss = 5.2340
Iteration 980, time = 1.97s, wps = 104169, train loss = 5.2383
Iteration 1000, time = 1.97s, wps = 104019, train loss = 5.2512
Iteration 1020, time = 1.95s, wps = 105193, train loss = 5.1936
Iteration 1040, time = 1.96s, wps = 104626, train loss = 5.1722
Iteration 1060, time = 1.94s, wps = 105373, train loss = 5.2039
Iteration 1080, time = 1.96s, wps = 104495, train loss = 5.1732
Iteration 1100, time = 1.96s, wps = 104693, train loss = 5.2157
Iteration 1120, time = 1.97s, wps = 104132, train loss = 5.1624
Iteration 1140, time = 1.95s, wps = 104766, train loss = 5.1486
Iteration 1160, time = 1.95s, wps = 105198, train loss = 5.1518
Iteration 1180, time = 1.95s, wps = 104823, train loss = 5.1174
Iteration 1200, time = 1.95s, wps = 104762, train loss = 5.0976
Iteration 1220, time = 1.98s, wps = 103696, train loss = 5.1189
Iteration 1240, time = 1.96s, wps = 104597, train loss = 5.1148
Iteration 1260, time = 1.96s, wps = 104302, train loss = 5.0447
Iteration 1280, time = 1.94s, wps = 105426, train loss = 5.0543
Iteration 1300, time = 1.96s, wps = 104546, train loss = 4.9996
Iteration 1320, time = 1.96s, wps = 104404, train loss = 5.0756
Iteration 1340, time = 1.96s, wps = 104410, train loss = 4.9974
Iteration 1360, time = 1.94s, wps = 105676, train loss = 5.0015
Iteration 1380, time = 1.96s, wps = 104348, train loss = 5.0652
Iteration 1400, time = 1.96s, wps = 104532, train loss = 5.0397
Iteration 1420, time = 1.96s, wps = 104503, train loss = 4.9260
Iteration 1440, time = 1.96s, wps = 104334, train loss = 5.0037
Iteration 1460, time = 1.96s, wps = 104323, train loss = 4.9556
Iteration 1480, time = 1.96s, wps = 104448, train loss = 4.9099
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m17.018s
user    21m46.554s
sys     2m32.505s
root@3a240370f11d:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=3 --datadir=./data/1-billion-word-
language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'run_profiler': False, 'num_gpus': 3, 'num_delayed_steps': 150, 'num_shards': 8, 'batch_size': 128, 'do_summaries': False, 'projected_size': 512, 'vocab_size': 793470, 'learning_rate': 0.2, 'keep_prob': 0.9, 'average_params': True, 'num_sampled': 8192, 'num_layers': 1, 'max_time': 180, 'max_grad_norm': 10.0, 'state_size': 2048, 'emb_size': 512, 'optimizer': 0, 'num_steps': 20}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1591040584.6742759
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
model/model_2/state_2_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:2
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-06-01 19:43:05.205626: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3700135000 Hz
2020-06-01 19:43:05.209227: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x8f4e470 executing computations on platform Host. Devices:
2020-06-01 19:43:05.209268: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-06-01 19:43:05.687430: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-01 19:43:05.693033: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-01 19:43:05.699560: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-01 19:43:05.700701: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x8f4de90 executing computations on platform CUDA. Devices:
2020-06-01 19:43:05.700735: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-06-01 19:43:05.700742: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-06-01 19:43:05.700748: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-01 19:43:05.700754: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-01 19:43:05.701828: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:01:00.0
totalMemory: 23.65GiB freeMemory: 23.22GiB
2020-06-01 19:43:05.701856: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.48GiB
2020-06-01 19:43:05.701878: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4a:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-01 19:43:05.701900: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4b:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-01 19:43:05.702035: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-06-01 19:43:06.467885: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-06-01 19:43:06.467922: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-06-01 19:43:06.467928: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-06-01 19:43:06.467932: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-06-01 19:43:06.467936: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-06-01 19:43:06.467940: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-06-01 19:43:06.468063: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22499 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-06-01 19:43:06.468314: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22757 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-06-01 19:43:06.468551: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10224 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:4a:00.0, compute capability: 7.5)
2020-06-01 19:43:06.468711: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10224 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:4b:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00034-of-00100
Finished processing!
2020-06-01 19:43:20.107165: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 1486, time = 8.82s, wps = 870, train loss = 5.6142
Iteration 1487, time = 5.92s, wps = 1298, train loss = 5.0426
Iteration 1488, time = 0.08s, wps = 91800, train loss = 4.9592
Iteration 1489, time = 0.08s, wps = 94880, train loss = 4.9635
Iteration 1490, time = 0.08s, wps = 92962, train loss = 4.9072
Iteration 1491, time = 0.08s, wps = 100000, train loss = 4.9775
Iteration 1492, time = 0.08s, wps = 99030, train loss = 4.9995
Iteration 1493, time = 0.08s, wps = 96366, train loss = 5.0654
Iteration 1494, time = 0.09s, wps = 89625, train loss = 4.9875
Iteration 1505, time = 0.85s, wps = 99880, train loss = 4.9536
Iteration 1525, time = 1.55s, wps = 99227, train loss = 4.9815
Iteration 1545, time = 1.56s, wps = 98606, train loss = 4.9903
Iteration 1565, time = 1.56s, wps = 98649, train loss = 4.9229
Iteration 1585, time = 1.56s, wps = 98773, train loss = 4.8905
Iteration 1605, time = 1.55s, wps = 99004, train loss = 4.9822
Iteration 1625, time = 1.55s, wps = 99005, train loss = 4.8438
Iteration 1645, time = 1.56s, wps = 98183, train loss = 4.8483
Iteration 1665, time = 1.57s, wps = 97973, train loss = 4.9288
Iteration 1685, time = 1.56s, wps = 98317, train loss = 4.8921
Iteration 1705, time = 1.56s, wps = 98588, train loss = 4.9027
Iteration 1725, time = 1.56s, wps = 98296, train loss = 4.9077
Iteration 1745, time = 1.56s, wps = 98256, train loss = 4.8009
Iteration 1765, time = 1.55s, wps = 99047, train loss = 4.9101
Iteration 1785, time = 1.55s, wps = 98780, train loss = 4.9300
Iteration 1805, time = 1.55s, wps = 98927, train loss = 4.8968
Iteration 1825, time = 1.55s, wps = 98947, train loss = 4.8276
Iteration 1845, time = 1.56s, wps = 98738, train loss = 4.9142
Iteration 1865, time = 1.55s, wps = 99011, train loss = 4.8745
Iteration 1885, time = 1.55s, wps = 98964, train loss = 4.8770
Iteration 1905, time = 1.56s, wps = 98727, train loss = 4.7965
Iteration 1925, time = 1.56s, wps = 98504, train loss = 4.7863
Iteration 1945, time = 1.55s, wps = 99041, train loss = 4.8014
Iteration 1965, time = 1.56s, wps = 98568, train loss = 4.8355
Iteration 1985, time = 1.56s, wps = 98380, train loss = 4.7691
Iteration 2005, time = 1.56s, wps = 98492, train loss = 4.8036
Iteration 2025, time = 1.56s, wps = 98274, train loss = 4.8450
Iteration 2045, time = 1.56s, wps = 98370, train loss = 4.8087
Iteration 2065, time = 1.55s, wps = 98968, train loss = 4.8128
Iteration 2085, time = 1.55s, wps = 98921, train loss = 4.7498
Iteration 2105, time = 1.55s, wps = 98898, train loss = 4.7869
Iteration 2125, time = 1.57s, wps = 98108, train loss = 4.8110
Iteration 2145, time = 1.55s, wps = 99012, train loss = 4.7893
Iteration 2165, time = 1.57s, wps = 98133, train loss = 4.7709
Iteration 2185, time = 1.56s, wps = 98196, train loss = 4.8284
Iteration 2205, time = 1.55s, wps = 98945, train loss = 4.6565
Iteration 2225, time = 1.56s, wps = 98431, train loss = 4.7483
Iteration 2245, time = 1.56s, wps = 98667, train loss = 4.7677
Iteration 2265, time = 1.55s, wps = 99203, train loss = 4.7426
Iteration 2285, time = 1.55s, wps = 98840, train loss = 4.6954
Iteration 2305, time = 1.56s, wps = 98304, train loss = 4.6895
Iteration 2325, time = 1.57s, wps = 97994, train loss = 4.7506
Iteration 2345, time = 1.55s, wps = 98806, train loss = 4.6804
Iteration 2365, time = 1.55s, wps = 98877, train loss = 4.7234
Iteration 2385, time = 1.56s, wps = 98691, train loss = 4.7549
Iteration 2405, time = 1.57s, wps = 97958, train loss = 4.6554
Iteration 2425, time = 1.55s, wps = 98892, train loss = 4.6501
Iteration 2445, time = 1.55s, wps = 98993, train loss = 4.6947
Iteration 2465, time = 1.56s, wps = 98545, train loss = 4.6424
Iteration 2485, time = 1.56s, wps = 98429, train loss = 4.6909
Iteration 2505, time = 1.56s, wps = 98563, train loss = 4.6614
Iteration 2525, time = 1.57s, wps = 97905, train loss = 4.6033
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00016-of-00100
Finished processing!
Iteration 2545, time = 3.17s, wps = 48464, train loss = 4.7240
Iteration 2565, time = 1.56s, wps = 98557, train loss = 4.7061
Iteration 2585, time = 1.57s, wps = 98144, train loss = 4.6309
Iteration 2605, time = 1.56s, wps = 98629, train loss = 4.7306
Iteration 2625, time = 1.56s, wps = 98770, train loss = 4.6862
Iteration 2645, time = 1.55s, wps = 99212, train loss = 4.6396
Iteration 2665, time = 1.57s, wps = 98133, train loss = 4.6414
Iteration 2685, time = 1.56s, wps = 98620, train loss = 4.6333
Iteration 2705, time = 1.56s, wps = 98743, train loss = 4.6082
Iteration 2725, time = 1.56s, wps = 98589, train loss = 4.6186
Iteration 2745, time = 1.56s, wps = 98601, train loss = 4.7122
Iteration 2765, time = 1.57s, wps = 97534, train loss = 4.6790
Iteration 2785, time = 1.56s, wps = 98731, train loss = 4.6548
Iteration 2805, time = 1.55s, wps = 98952, train loss = 4.6668
Iteration 2825, time = 1.55s, wps = 99017, train loss = 4.6159
Iteration 2845, time = 1.56s, wps = 98378, train loss = 4.5993
Iteration 2865, time = 1.56s, wps = 98330, train loss = 4.6155
Iteration 2885, time = 1.57s, wps = 97701, train loss = 4.6507
Iteration 2905, time = 1.56s, wps = 98465, train loss = 4.5775
Iteration 2925, time = 1.56s, wps = 98443, train loss = 4.5996
Iteration 2945, time = 1.56s, wps = 98507, train loss = 4.5681
Iteration 2965, time = 1.55s, wps = 98802, train loss = 4.5380
Iteration 2985, time = 1.56s, wps = 98513, train loss = 4.5951
Iteration 3005, time = 1.56s, wps = 98411, train loss = 4.6011
Iteration 3025, time = 1.56s, wps = 98263, train loss = 4.5514
Iteration 3045, time = 1.55s, wps = 99182, train loss = 4.5543
Iteration 3065, time = 1.57s, wps = 98140, train loss = 4.5863
Iteration 3085, time = 1.57s, wps = 98114, train loss = 4.6070
Iteration 3105, time = 1.56s, wps = 98498, train loss = 4.6758
Iteration 3125, time = 1.56s, wps = 98217, train loss = 4.5680
Iteration 3145, time = 1.57s, wps = 98127, train loss = 4.5243
Iteration 3165, time = 1.57s, wps = 97746, train loss = 4.5726
Iteration 3185, time = 1.56s, wps = 98558, train loss = 4.5437
Iteration 3205, time = 1.57s, wps = 98098, train loss = 4.5981
Iteration 3225, time = 1.56s, wps = 98242, train loss = 4.5723
Iteration 3245, time = 1.56s, wps = 98376, train loss = 4.5778
Iteration 3265, time = 1.56s, wps = 98694, train loss = 4.5443
Iteration 3285, time = 1.56s, wps = 98758, train loss = 4.5539
Iteration 3305, time = 1.56s, wps = 98556, train loss = 4.5503
Iteration 3325, time = 1.56s, wps = 98165, train loss = 4.5513
Iteration 3345, time = 1.55s, wps = 98847, train loss = 4.5106
Iteration 3365, time = 1.56s, wps = 98331, train loss = 4.4815
Iteration 3385, time = 1.56s, wps = 98385, train loss = 4.6506
Iteration 3405, time = 1.57s, wps = 98076, train loss = 4.6141
Iteration 3425, time = 1.56s, wps = 98170, train loss = 4.5841
Iteration 3445, time = 1.57s, wps = 97898, train loss = 4.6132
Iteration 3465, time = 1.57s, wps = 97922, train loss = 4.5109
Iteration 3485, time = 1.57s, wps = 97855, train loss = 4.5652
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m14.326s
user    19m16.343s
sys     2m15.496s
root@3a240370f11d:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-
language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'vocab_size': 793470, 'batch_size': 128, 'num_shards': 8, 'keep_prob': 0.9, 'emb_size': 512, 'num_steps': 20, 'num_layers': 1, 'run_profiler': False, 'projected_size': 512, 'num_sampled': 8192, 'state_size': 2048, 'num_delayed_steps': 150, 'average_params': True, 'num_gpus': 2, 'max_grad_norm': 10.0, 'do_summaries': False, 'max_time': 180, 'learning_rate': 0.2, 'optimizer': 0}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1591043546.0984664
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-06-01 20:32:26.496625: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3700135000 Hz
2020-06-01 20:32:26.500350: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x9b4bf20 executing computations on platform Host. Devices:
2020-06-01 20:32:26.500390: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-06-01 20:32:26.950737: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-01 20:32:26.955560: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-01 20:32:26.962692: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-01 20:32:26.963602: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x993a390 executing computations on platform CUDA. Devices:
2020-06-01 20:32:26.963644: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-06-01 20:32:26.963650: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-06-01 20:32:26.963658: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-01 20:32:26.963665: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-01 20:32:26.964744: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:01:00.0
totalMemory: 23.65GiB freeMemory: 23.22GiB
2020-06-01 20:32:26.964772: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.48GiB
2020-06-01 20:32:26.964795: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4a:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-01 20:32:26.964818: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4b:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-01 20:32:26.964956: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-06-01 20:32:27.738024: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-06-01 20:32:27.738062: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-06-01 20:32:27.738067: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-06-01 20:32:27.738070: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-06-01 20:32:27.738078: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-06-01 20:32:27.738083: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-06-01 20:32:27.738218: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22500 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-06-01 20:32:27.738573: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22757 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-06-01 20:32:27.738925: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10224 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:4a:00.0, compute capability: 7.5)
2020-06-01 20:32:27.739212: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10224 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:4b:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00011-of-00100
Finished processing!
2020-06-01 20:32:37.989914: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 3494, time = 6.09s, wps = 841, train loss = 4.8763
Iteration 3495, time = 3.49s, wps = 1467, train loss = 4.6180
Iteration 3496, time = 0.07s, wps = 76141, train loss = 4.5492
Iteration 3497, time = 0.07s, wps = 77711, train loss = 4.5084
Iteration 3498, time = 0.07s, wps = 75371, train loss = 4.4828
Iteration 3499, time = 0.06s, wps = 87037, train loss = 4.4547
Iteration 3500, time = 0.06s, wps = 84301, train loss = 4.5413
Iteration 3501, time = 0.06s, wps = 87051, train loss = 4.5439
Iteration 3502, time = 0.06s, wps = 86380, train loss = 4.5578
Iteration 3513, time = 0.67s, wps = 84578, train loss = 4.5844
Iteration 3533, time = 1.19s, wps = 86270, train loss = 4.5223
Iteration 3553, time = 1.18s, wps = 86470, train loss = 4.5708
Iteration 3573, time = 1.18s, wps = 86433, train loss = 4.5274
Iteration 3593, time = 1.19s, wps = 85833, train loss = 4.5301
Iteration 3613, time = 1.21s, wps = 84531, train loss = 4.5983
Iteration 3633, time = 1.20s, wps = 85605, train loss = 4.4184
Iteration 3653, time = 1.20s, wps = 85467, train loss = 4.4825
Iteration 3673, time = 1.20s, wps = 85245, train loss = 4.5229
Iteration 3693, time = 1.19s, wps = 85941, train loss = 4.4787
Iteration 3713, time = 1.21s, wps = 84585, train loss = 4.4681
Iteration 3733, time = 1.19s, wps = 85927, train loss = 4.4709
Iteration 3753, time = 1.19s, wps = 85707, train loss = 4.4867
Iteration 3773, time = 1.19s, wps = 85803, train loss = 4.5386
Iteration 3793, time = 1.19s, wps = 85770, train loss = 4.4912
Iteration 3813, time = 1.20s, wps = 85590, train loss = 4.4897
Iteration 3833, time = 1.19s, wps = 85700, train loss = 4.4913
Iteration 3853, time = 1.20s, wps = 85276, train loss = 4.4974
Iteration 3873, time = 1.20s, wps = 85265, train loss = 4.4296
Iteration 3893, time = 1.19s, wps = 86110, train loss = 4.4093
Iteration 3913, time = 1.20s, wps = 85474, train loss = 4.4766
Iteration 3933, time = 1.19s, wps = 86200, train loss = 4.5235
Iteration 3953, time = 1.20s, wps = 85576, train loss = 4.4369
Iteration 3973, time = 1.19s, wps = 86169, train loss = 4.4775
Iteration 3993, time = 1.19s, wps = 85855, train loss = 4.4662
Iteration 4013, time = 1.19s, wps = 86190, train loss = 4.4700
Iteration 4033, time = 1.19s, wps = 85769, train loss = 4.4297
Iteration 4053, time = 1.19s, wps = 86103, train loss = 4.4921
Iteration 4073, time = 1.19s, wps = 85813, train loss = 4.5815
Iteration 4093, time = 1.18s, wps = 86521, train loss = 4.5540
Iteration 4113, time = 1.19s, wps = 86234, train loss = 4.5792
Iteration 4133, time = 1.19s, wps = 85937, train loss = 4.4461
Iteration 4153, time = 1.20s, wps = 85609, train loss = 4.4309
Iteration 4173, time = 1.19s, wps = 85960, train loss = 4.5012
Iteration 4193, time = 1.19s, wps = 85891, train loss = 4.4490
Iteration 4213, time = 1.20s, wps = 85122, train loss = 4.4630
Iteration 4233, time = 1.19s, wps = 86269, train loss = 4.4908
Iteration 4253, time = 1.19s, wps = 86223, train loss = 4.4835
Iteration 4273, time = 1.19s, wps = 85721, train loss = 4.4538
Iteration 4293, time = 1.20s, wps = 85189, train loss = 4.4163
Iteration 4313, time = 1.20s, wps = 85519, train loss = 4.3972
Iteration 4333, time = 1.20s, wps = 85309, train loss = 4.4831
Iteration 4353, time = 1.20s, wps = 85170, train loss = 4.4980
Iteration 4373, time = 1.20s, wps = 85071, train loss = 4.4818
Iteration 4393, time = 1.19s, wps = 85821, train loss = 4.4659
Iteration 4413, time = 1.21s, wps = 84850, train loss = 4.5126
Iteration 4433, time = 1.19s, wps = 85892, train loss = 4.4872
Iteration 4453, time = 1.20s, wps = 85673, train loss = 4.3884
Iteration 4473, time = 1.19s, wps = 86031, train loss = 4.4637
Iteration 4493, time = 1.20s, wps = 85326, train loss = 4.4990
Iteration 4513, time = 1.20s, wps = 85219, train loss = 4.5076
Iteration 4533, time = 1.20s, wps = 85234, train loss = 4.4179
Iteration 4553, time = 1.21s, wps = 84733, train loss = 4.3759
Iteration 4573, time = 1.20s, wps = 85536, train loss = 4.3635
Iteration 4593, time = 1.19s, wps = 85717, train loss = 4.4320
Iteration 4613, time = 1.20s, wps = 85579, train loss = 4.4330
Iteration 4633, time = 1.20s, wps = 85521, train loss = 4.4247
Iteration 4653, time = 1.19s, wps = 85888, train loss = 4.3932
Iteration 4673, time = 1.19s, wps = 85938, train loss = 4.4002
Iteration 4693, time = 1.19s, wps = 85860, train loss = 4.5072
Iteration 4713, time = 1.20s, wps = 85375, train loss = 4.4632
Iteration 4733, time = 1.20s, wps = 85479, train loss = 4.4350
Iteration 4753, time = 1.19s, wps = 86117, train loss = 4.4137
Iteration 4773, time = 1.20s, wps = 85512, train loss = 4.3311
Iteration 4793, time = 1.20s, wps = 85524, train loss = 4.3432
Iteration 4813, time = 1.20s, wps = 85529, train loss = 4.4212
Iteration 4833, time = 1.20s, wps = 85185, train loss = 4.4165
Iteration 4853, time = 1.20s, wps = 85258, train loss = 4.3791
Iteration 4873, time = 1.19s, wps = 85958, train loss = 4.3577
Iteration 4893, time = 1.21s, wps = 84918, train loss = 4.4400
Iteration 4913, time = 1.20s, wps = 85429, train loss = 4.3171
Iteration 4933, time = 1.21s, wps = 84896, train loss = 4.3234
Iteration 4953, time = 1.20s, wps = 85411, train loss = 4.5441
Iteration 4973, time = 1.20s, wps = 85491, train loss = 4.5109
Iteration 4993, time = 1.20s, wps = 85654, train loss = 4.4492
Iteration 5013, time = 1.21s, wps = 84550, train loss = 4.2957
Iteration 5033, time = 1.20s, wps = 85522, train loss = 4.4032
Iteration 5053, time = 1.20s, wps = 85555, train loss = 4.3830
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00021-of-00100
Finished processing!
Iteration 5073, time = 2.78s, wps = 36824, train loss = 4.4500
Iteration 5093, time = 1.20s, wps = 85302, train loss = 4.4508
Iteration 5113, time = 1.20s, wps = 85501, train loss = 4.3677
Iteration 5133, time = 1.20s, wps = 85008, train loss = 4.4410
Iteration 5153, time = 1.20s, wps = 85677, train loss = 4.4271
Iteration 5173, time = 1.20s, wps = 85429, train loss = 4.3795
Iteration 5193, time = 1.19s, wps = 85754, train loss = 4.4429
Iteration 5213, time = 1.21s, wps = 84812, train loss = 4.4057
Iteration 5233, time = 1.20s, wps = 85537, train loss = 4.3585
Iteration 5253, time = 1.20s, wps = 85339, train loss = 4.4529
Iteration 5273, time = 1.21s, wps = 84951, train loss = 4.4069
Iteration 5293, time = 1.20s, wps = 85190, train loss = 4.4393
Iteration 5313, time = 1.20s, wps = 85155, train loss = 4.3538
Iteration 5333, time = 1.19s, wps = 85885, train loss = 4.2833
Iteration 5353, time = 1.20s, wps = 85434, train loss = 4.3452
Iteration 5373, time = 1.20s, wps = 85588, train loss = 4.3130
Iteration 5393, time = 1.20s, wps = 85230, train loss = 4.3516
Iteration 5413, time = 1.20s, wps = 85179, train loss = 4.3395
Iteration 5433, time = 1.19s, wps = 85773, train loss = 4.3718
Iteration 5453, time = 1.20s, wps = 85450, train loss = 4.3622
Iteration 5473, time = 1.21s, wps = 84803, train loss = 4.3529
Iteration 5493, time = 1.20s, wps = 85505, train loss = 4.4329
Iteration 5513, time = 1.20s, wps = 85091, train loss = 4.3551
Iteration 5533, time = 1.20s, wps = 85675, train loss = 4.3686
Iteration 5553, time = 1.20s, wps = 85118, train loss = 4.4024
Iteration 5573, time = 1.20s, wps = 85129, train loss = 4.3636
Iteration 5593, time = 1.20s, wps = 85520, train loss = 4.4449
Iteration 5613, time = 1.21s, wps = 84851, train loss = 4.4038
Iteration 5633, time = 1.20s, wps = 85158, train loss = 4.3988
Iteration 5653, time = 1.20s, wps = 85168, train loss = 4.4314
Iteration 5673, time = 1.20s, wps = 85426, train loss = 4.3232
Iteration 5693, time = 1.20s, wps = 85180, train loss = 4.4340
Iteration 5713, time = 1.21s, wps = 84610, train loss = 4.2733
Iteration 5733, time = 1.22s, wps = 83793, train loss = 4.3978
Iteration 5753, time = 1.20s, wps = 85628, train loss = 4.3536
Iteration 5773, time = 1.20s, wps = 85346, train loss = 4.2613
Iteration 5793, time = 1.20s, wps = 85679, train loss = 4.2861
Iteration 5813, time = 1.22s, wps = 84276, train loss = 4.3616
Iteration 5833, time = 1.21s, wps = 84798, train loss = 4.3236
Iteration 5853, time = 1.20s, wps = 85306, train loss = 4.2882
Iteration 5873, time = 1.21s, wps = 84907, train loss = 4.3955
Iteration 5893, time = 1.20s, wps = 85682, train loss = 4.2749
Iteration 5913, time = 1.20s, wps = 85152, train loss = 4.4346
Iteration 5933, time = 1.21s, wps = 84926, train loss = 4.3523
Iteration 5953, time = 1.20s, wps = 85057, train loss = 4.3074
Iteration 5973, time = 1.21s, wps = 84774, train loss = 4.3174
Iteration 5993, time = 1.21s, wps = 84290, train loss = 4.1985
Iteration 6013, time = 1.21s, wps = 84894, train loss = 4.4451
Iteration 6033, time = 1.20s, wps = 85479, train loss = 4.3136
Iteration 6053, time = 1.21s, wps = 84705, train loss = 4.3561
Iteration 6073, time = 1.21s, wps = 84966, train loss = 4.3888
Iteration 6093, time = 1.21s, wps = 84889, train loss = 4.3832
Iteration 6113, time = 1.20s, wps = 85110, train loss = 4.3940
Iteration 6133, time = 1.21s, wps = 84966, train loss = 4.3767
Iteration 6153, time = 1.21s, wps = 84955, train loss = 4.2223
Iteration 6173, time = 1.21s, wps = 84707, train loss = 4.3892
Iteration 6193, time = 1.21s, wps = 84584, train loss = 4.3384
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m11.947s
user    14m40.786s
sys     1m43.165s
root@3a240370f11d:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=1 --datadir=./data/1-billion-word-
language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'max_grad_norm': 10.0, 'vocab_size': 793470, 'num_delayed_steps': 150, 'projected_size': 512, 'emb_size': 512, 'num_steps': 20, 'num_gpus': 1, 'num_sampled': 8192, 'state_size': 2048, 'learning_rate': 0.2, 'num_shards': 8, 'batch_size': 128, 'optimizer': 0, 'num_layers': 1, 'max_time': 180, 'do_summaries': False, 'average_params': True, 'run_profiler': False, 'keep_prob': 0.9}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1591044710.8674254
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2020-06-01 20:51:51.066629: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3700135000 Hz
2020-06-01 20:51:51.070481: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x6b669a0 executing computations on platform Host. Devices:
2020-06-01 20:51:51.070528: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2020-06-01 20:51:51.519140: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-01 20:51:51.557621: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-01 20:51:51.564657: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-06-01 20:51:51.565535: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x6b42580 executing computations on platform CUDA. Devices:
2020-06-01 20:51:51.565577: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): TITAN RTX, Compute Capability 7.5
2020-06-01 20:51:51.565583: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): TITAN RTX, Compute Capability 7.5
2020-06-01 20:51:51.565588: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (2): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-01 20:51:51.565594: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (3): GeForce RTX 2080 Ti, Compute Capability 7.5
2020-06-01 20:51:51.566704: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:01:00.0
totalMemory: 23.65GiB freeMemory: 23.22GiB
2020-06-01 20:51:51.566733: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: TITAN RTX major: 7 minor: 5 memoryClockRate(GHz): 1.77
pciBusID: 0000:21:00.0
totalMemory: 23.65GiB freeMemory: 23.48GiB
2020-06-01 20:51:51.566755: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 2 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4a:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-01 20:51:51.566777: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 3 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:4b:00.0
totalMemory: 10.76GiB freeMemory: 10.60GiB
2020-06-01 20:51:51.566913: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1, 2, 3
2020-06-01 20:51:52.330532: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-06-01 20:51:52.330570: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1 2 3
2020-06-01 20:51:52.330575: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N N N N
2020-06-01 20:51:52.330579: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   N N N N
2020-06-01 20:51:52.330587: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 2:   N N N N
2020-06-01 20:51:52.330591: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 3:   N N N N
2020-06-01 20:51:52.330727: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22500 MB memory) -> physical GPU (device: 0, name: TITAN RTX, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-06-01 20:51:52.330972: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 22757 MB memory) -> physical GPU (device: 1, name: TITAN RTX, pci bus id: 0000:21:00.0, compute capability: 7.5)
2020-06-01 20:51:52.331248: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10224 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:4a:00.0, compute capability: 7.5)
2020-06-01 20:51:52.331526: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10224 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:4b:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00051-of-00100
Finished processing!
2020-06-01 20:51:59.745407: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 6208, time = 3.85s, wps = 665, train loss = 4.6341
Iteration 6209, time = 1.80s, wps = 1423, train loss = 4.3980
Iteration 6210, time = 0.06s, wps = 45603, train loss = 4.3896
Iteration 6211, time = 0.05s, wps = 48117, train loss = 4.3405
Iteration 6212, time = 0.06s, wps = 45418, train loss = 4.2486
Iteration 6213, time = 0.05s, wps = 48315, train loss = 4.3735
Iteration 6214, time = 0.04s, wps = 58407, train loss = 4.3213
Iteration 6215, time = 0.05s, wps = 54050, train loss = 4.3433
Iteration 6216, time = 0.05s, wps = 53446, train loss = 4.2175
Iteration 6227, time = 0.51s, wps = 54693, train loss = 4.3742
Iteration 6247, time = 0.96s, wps = 53493, train loss = 4.4245
Iteration 6267, time = 0.94s, wps = 54346, train loss = 4.2588
Iteration 6287, time = 0.94s, wps = 54411, train loss = 4.4472
Iteration 6307, time = 0.94s, wps = 54519, train loss = 4.3803
Iteration 6327, time = 0.95s, wps = 54094, train loss = 4.3794
Iteration 6347, time = 0.95s, wps = 53858, train loss = 4.3120
Iteration 6367, time = 0.94s, wps = 54286, train loss = 4.2293
Iteration 6387, time = 0.94s, wps = 54524, train loss = 4.4599
Iteration 6407, time = 0.95s, wps = 54162, train loss = 4.2968
Iteration 6427, time = 0.95s, wps = 53989, train loss = 4.2345
Iteration 6447, time = 0.96s, wps = 53503, train loss = 4.3044
Iteration 6467, time = 0.95s, wps = 53966, train loss = 4.3254
Iteration 6487, time = 0.94s, wps = 54279, train loss = 4.4034
Iteration 6507, time = 0.94s, wps = 54370, train loss = 4.4364
Iteration 6527, time = 0.95s, wps = 53843, train loss = 4.4111
Iteration 6547, time = 0.95s, wps = 54173, train loss = 4.3590
Iteration 6567, time = 0.95s, wps = 53795, train loss = 4.3986
Iteration 6587, time = 0.94s, wps = 54403, train loss = 4.3408
Iteration 6607, time = 0.95s, wps = 53837, train loss = 4.3331
Iteration 6627, time = 0.94s, wps = 54502, train loss = 4.2482
Iteration 6647, time = 0.94s, wps = 54284, train loss = 4.4300
Iteration 6667, time = 0.95s, wps = 53938, train loss = 4.3301
Iteration 6687, time = 0.94s, wps = 54350, train loss = 4.3286
Iteration 6707, time = 0.95s, wps = 54099, train loss = 4.3738
Iteration 6727, time = 0.94s, wps = 54338, train loss = 4.2359
Iteration 6747, time = 0.96s, wps = 53289, train loss = 4.3580
Iteration 6767, time = 0.96s, wps = 53090, train loss = 4.4069
Iteration 6787, time = 0.97s, wps = 52861, train loss = 4.2299
Iteration 6807, time = 0.95s, wps = 53746, train loss = 4.3145
Iteration 6827, time = 0.95s, wps = 54110, train loss = 4.3505
Iteration 6847, time = 0.96s, wps = 53137, train loss = 4.3104
Iteration 6867, time = 0.95s, wps = 53807, train loss = 4.3842
Iteration 6887, time = 0.94s, wps = 54321, train loss = 4.4225
Iteration 6907, time = 0.95s, wps = 53892, train loss = 4.2659
Iteration 6927, time = 0.94s, wps = 54279, train loss = 4.4397
Iteration 6947, time = 0.95s, wps = 54024, train loss = 4.3171
Iteration 6967, time = 0.94s, wps = 54289, train loss = 4.3548
Iteration 6987, time = 0.94s, wps = 54204, train loss = 4.4337
Iteration 7007, time = 0.94s, wps = 54242, train loss = 4.4383
Iteration 7027, time = 0.95s, wps = 53871, train loss = 4.2482
Iteration 7047, time = 0.97s, wps = 52699, train loss = 4.2971
Iteration 7067, time = 0.95s, wps = 54008, train loss = 4.2008
Iteration 7087, time = 0.95s, wps = 54046, train loss = 4.2745
Iteration 7107, time = 0.94s, wps = 54487, train loss = 4.2914
Iteration 7127, time = 0.94s, wps = 54321, train loss = 4.2341
Iteration 7147, time = 0.95s, wps = 53618, train loss = 4.2344
Iteration 7167, time = 0.95s, wps = 54146, train loss = 4.3611
Iteration 7187, time = 0.95s, wps = 54100, train loss = 4.2731
Iteration 7207, time = 0.95s, wps = 54040, train loss = 4.3233
Iteration 7227, time = 0.95s, wps = 53826, train loss = 4.3591
Iteration 7247, time = 0.96s, wps = 53225, train loss = 4.2984
Iteration 7267, time = 0.96s, wps = 53382, train loss = 4.3882
Iteration 7287, time = 0.96s, wps = 53476, train loss = 4.4744
Iteration 7307, time = 0.94s, wps = 54318, train loss = 4.3279
Iteration 7327, time = 0.95s, wps = 53946, train loss = 4.4096
Iteration 7347, time = 0.95s, wps = 53726, train loss = 4.2428
Iteration 7367, time = 0.94s, wps = 54356, train loss = 4.3980
Iteration 7387, time = 0.96s, wps = 53358, train loss = 4.2516
Iteration 7407, time = 0.95s, wps = 53718, train loss = 4.3782
Iteration 7427, time = 0.95s, wps = 53858, train loss = 4.2871
Iteration 7447, time = 0.95s, wps = 53939, train loss = 4.3003
Iteration 7467, time = 0.95s, wps = 53800, train loss = 4.3252
Iteration 7487, time = 0.95s, wps = 54127, train loss = 4.3932
Iteration 7507, time = 0.95s, wps = 53657, train loss = 4.4103
Iteration 7527, time = 0.94s, wps = 54217, train loss = 4.3188
Iteration 7547, time = 0.95s, wps = 53642, train loss = 4.2674
Iteration 7567, time = 0.96s, wps = 53501, train loss = 4.3843
Iteration 7587, time = 0.95s, wps = 53670, train loss = 4.3364
Iteration 7607, time = 0.95s, wps = 53887, train loss = 4.2428
Iteration 7627, time = 0.95s, wps = 53952, train loss = 4.3425
Iteration 7647, time = 0.96s, wps = 53170, train loss = 4.2231
Iteration 7667, time = 0.96s, wps = 53597, train loss = 4.3309
Iteration 7687, time = 0.95s, wps = 53944, train loss = 4.3012
Iteration 7707, time = 0.96s, wps = 53374, train loss = 4.4474
Iteration 7727, time = 0.95s, wps = 53853, train loss = 4.3827
Iteration 7747, time = 0.96s, wps = 53359, train loss = 4.3415
Iteration 7767, time = 0.95s, wps = 53877, train loss = 4.2797
Iteration 7787, time = 0.95s, wps = 53786, train loss = 4.2725
Iteration 7807, time = 0.96s, wps = 53467, train loss = 4.4439
Iteration 7827, time = 0.96s, wps = 53133, train loss = 4.3320
Iteration 7847, time = 0.96s, wps = 53282, train loss = 4.2940
Iteration 7867, time = 0.97s, wps = 52895, train loss = 4.4132
Iteration 7887, time = 0.96s, wps = 53374, train loss = 4.2965
Iteration 7907, time = 0.96s, wps = 53324, train loss = 4.3241
Iteration 7927, time = 0.96s, wps = 53522, train loss = 4.2624
Iteration 7947, time = 0.96s, wps = 53581, train loss = 4.3365
Iteration 7967, time = 0.96s, wps = 53239, train loss = 4.2602
Iteration 7987, time = 0.95s, wps = 54047, train loss = 4.3147
Iteration 8007, time = 0.95s, wps = 53631, train loss = 4.4166
Iteration 8027, time = 0.96s, wps = 53348, train loss = 4.3481
Iteration 8047, time = 0.98s, wps = 52129, train loss = 4.3191
Iteration 8067, time = 0.96s, wps = 53110, train loss = 4.4193
Iteration 8087, time = 0.95s, wps = 53665, train loss = 4.2284
Iteration 8107, time = 0.96s, wps = 53228, train loss = 4.2401
Iteration 8127, time = 0.96s, wps = 53135, train loss = 4.2256
Iteration 8147, time = 0.95s, wps = 53958, train loss = 4.3009
Iteration 8167, time = 0.96s, wps = 53528, train loss = 4.3702
Iteration 8187, time = 0.96s, wps = 53355, train loss = 4.2690
Iteration 8207, time = 0.95s, wps = 54140, train loss = 4.2740
Iteration 8227, time = 0.96s, wps = 53589, train loss = 4.2372
Iteration 8247, time = 0.95s, wps = 53623, train loss = 4.4632
Iteration 8267, time = 0.96s, wps = 53456, train loss = 4.1985
Iteration 8287, time = 0.97s, wps = 53031, train loss = 4.3412
Iteration 8307, time = 0.97s, wps = 53020, train loss = 4.2668
Iteration 8327, time = 0.96s, wps = 53376, train loss = 4.3048
Iteration 8347, time = 0.95s, wps = 53705, train loss = 4.2933
Iteration 8367, time = 0.95s, wps = 53976, train loss = 4.2805
Iteration 8387, time = 0.95s, wps = 53912, train loss = 4.3370
Iteration 8407, time = 0.96s, wps = 53513, train loss = 4.3675
Iteration 8427, time = 0.96s, wps = 53408, train loss = 4.3837
Iteration 8447, time = 0.96s, wps = 53446, train loss = 4.4697
Iteration 8467, time = 0.95s, wps = 53628, train loss = 4.3325
Iteration 8487, time = 0.96s, wps = 53396, train loss = 4.3039
Iteration 8507, time = 0.96s, wps = 53210, train loss = 4.3607
Iteration 8527, time = 0.96s, wps = 53493, train loss = 4.2610
Iteration 8547, time = 0.97s, wps = 52552, train loss = 4.3606
Iteration 8567, time = 0.95s, wps = 53630, train loss = 4.3509
Iteration 8587, time = 0.98s, wps = 52110, train loss = 4.2116
Iteration 8607, time = 0.96s, wps = 53608, train loss = 4.3673
Iteration 8627, time = 0.96s, wps = 53224, train loss = 4.2168
Iteration 8647, time = 0.96s, wps = 53222, train loss = 4.4127
Iteration 8667, time = 0.97s, wps = 52826, train loss = 4.3253
Iteration 8687, time = 0.98s, wps = 52422, train loss = 4.2069
Iteration 8707, time = 0.96s, wps = 53184, train loss = 4.2843
Iteration 8727, time = 0.96s, wps = 53439, train loss = 4.1462
Iteration 8747, time = 0.95s, wps = 53962, train loss = 4.1900
Iteration 8767, time = 0.95s, wps = 53648, train loss = 4.3115
Iteration 8787, time = 0.97s, wps = 52634, train loss = 4.3254
Iteration 8807, time = 0.98s, wps = 52507, train loss = 4.2801
Iteration 8827, time = 0.95s, wps = 53922, train loss = 4.2632
Iteration 8847, time = 0.97s, wps = 52561, train loss = 4.3774
Iteration 8867, time = 0.95s, wps = 54013, train loss = 4.2678
Iteration 8887, time = 0.97s, wps = 52836, train loss = 4.2721
Iteration 8907, time = 0.96s, wps = 53296, train loss = 4.1908
Iteration 8927, time = 0.96s, wps = 53068, train loss = 4.4227
Iteration 8947, time = 0.96s, wps = 53195, train loss = 4.3236
Iteration 8967, time = 0.97s, wps = 52592, train loss = 4.1919
Iteration 8987, time = 0.98s, wps = 52427, train loss = 4.2991
Iteration 9007, time = 0.98s, wps = 52177, train loss = 4.2672
Iteration 9027, time = 0.96s, wps = 53378, train loss = 4.1333
Iteration 9047, time = 0.98s, wps = 52348, train loss = 4.3370
Iteration 9067, time = 0.96s, wps = 53189, train loss = 4.2492
Iteration 9087, time = 0.96s, wps = 53376, train loss = 4.2925
Iteration 9107, time = 0.97s, wps = 52642, train loss = 4.2439
Iteration 9127, time = 0.96s, wps = 53411, train loss = 4.1978
Iteration 9147, time = 0.97s, wps = 52991, train loss = 4.1665
Iteration 9167, time = 0.97s, wps = 52863, train loss = 4.2880
Iteration 9187, time = 0.98s, wps = 52350, train loss = 4.3231
Iteration 9207, time = 0.99s, wps = 51834, train loss = 4.2113
Iteration 9227, time = 0.95s, wps = 53851, train loss = 4.1763
Iteration 9247, time = 0.98s, wps = 52363, train loss = 4.2041
Iteration 9267, time = 0.97s, wps = 52623, train loss = 4.1740
Iteration 9287, time = 0.97s, wps = 52553, train loss = 4.1767
Iteration 9307, time = 0.98s, wps = 51983, train loss = 4.3073
Iteration 9327, time = 0.97s, wps = 52815, train loss = 4.3052
Iteration 9347, time = 0.98s, wps = 52488, train loss = 4.2142
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00019-of-00100
Finished processing!
Iteration 9367, time = 2.54s, wps = 20184, train loss = 4.1994
Iteration 9387, time = 0.96s, wps = 53343, train loss = 4.2714
Iteration 9407, time = 0.98s, wps = 52215, train loss = 4.3703
Iteration 9427, time = 0.99s, wps = 51872, train loss = 4.1935
Iteration 9447, time = 0.97s, wps = 52615, train loss = 4.4510
Iteration 9467, time = 0.98s, wps = 52499, train loss = 4.1877
Iteration 9487, time = 0.98s, wps = 52396, train loss = 4.3557
Iteration 9507, time = 0.97s, wps = 52713, train loss = 4.3085
Iteration 9527, time = 0.97s, wps = 52873, train loss = 4.1230
Iteration 9547, time = 0.98s, wps = 52397, train loss = 4.2766
Iteration 9567, time = 0.99s, wps = 51486, train loss = 4.2206
Iteration 9587, time = 0.97s, wps = 52675, train loss = 4.3463
Iteration 9607, time = 0.98s, wps = 52074, train loss = 4.4602
Iteration 9627, time = 0.99s, wps = 51873, train loss = 4.2355
Iteration 9647, time = 0.99s, wps = 51926, train loss = 4.2779
Iteration 9667, time = 0.99s, wps = 51918, train loss = 4.3491
Iteration 9687, time = 0.98s, wps = 52299, train loss = 4.2330
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m10.133s
user    8m7.645s
sys     1m8.496s
root@3a240370f11d:/workspace/nvidia-examples/big_lstm# cat /etc/os-release
NAME="Ubuntu"
VERSION="16.04.6 LTS (Xenial Xerus)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 16.04.6 LTS"
VERSION_ID="16.04"
HOME_URL="http://www.ubuntu.com/"
SUPPORT_URL="http://help.ubuntu.com/"
BUG_REPORT_URL="http://bugs.launchpad.net/ubuntu/"
VERSION_CODENAME=xenial
UBUNTU_CODENAME=xenial
root@3a240370f11d:/workspace/nvidia-examples/big_lstm# nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Fri_Feb__8_19:08:17_PST_2019
Cuda compilation tools, release 10.1, V10.1.105
root@3a240370f11d:/workspace/nvidia-examples/big_lstm# cd data
root@3a240370f11d:/workspace/nvidia-examples/big_lstm/data# ls
1-billion-word-language-modeling-benchmark-r13output
root@3a240370f11d:/workspace/nvidia-examples/big_lstm/data# cd 1-billion-word-language-modeling-benchmark-r13output
root@3a240370f11d:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# ls
1b_word_vocab.txt  heldout-monolingual.tokenized.shuffled
README             training-monolingual.tokenized.shuffled
root@3a240370f11d:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# cd training-monolingual.tokenized.shuffled
root@3a240370f11d:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# ls
news.en-00001-of-00100  news.en-00034-of-00100  news.en-00067-of-00100
news.en-00002-of-00100  news.en-00035-of-00100  news.en-00068-of-00100
news.en-00003-of-00100  news.en-00036-of-00100  news.en-00069-of-00100
news.en-00004-of-00100  news.en-00037-of-00100  news.en-00070-of-00100
news.en-00005-of-00100  news.en-00038-of-00100  news.en-00071-of-00100
news.en-00006-of-00100  news.en-00039-of-00100  news.en-00072-of-00100
news.en-00007-of-00100  news.en-00040-of-00100  news.en-00073-of-00100
news.en-00008-of-00100  news.en-00041-of-00100  news.en-00074-of-00100
news.en-00009-of-00100  news.en-00042-of-00100  news.en-00075-of-00100
news.en-00010-of-00100  news.en-00043-of-00100  news.en-00076-of-00100
news.en-00011-of-00100  news.en-00044-of-00100  news.en-00077-of-00100
news.en-00012-of-00100  news.en-00045-of-00100  news.en-00078-of-00100
news.en-00013-of-00100  news.en-00046-of-00100  news.en-00079-of-00100
news.en-00014-of-00100  news.en-00047-of-00100  news.en-00080-of-00100
news.en-00015-of-00100  news.en-00048-of-00100  news.en-00081-of-00100
news.en-00016-of-00100  news.en-00049-of-00100  news.en-00082-of-00100
news.en-00017-of-00100  news.en-00050-of-00100  news.en-00083-of-00100
news.en-00018-of-00100  news.en-00051-of-00100  news.en-00084-of-00100
news.en-00019-of-00100  news.en-00052-of-00100  news.en-00085-of-00100
news.en-00020-of-00100  news.en-00053-of-00100  news.en-00086-of-00100
news.en-00021-of-00100  news.en-00054-of-00100  news.en-00087-of-00100
news.en-00022-of-00100  news.en-00055-of-00100  news.en-00088-of-00100
news.en-00023-of-00100  news.en-00056-of-00100  news.en-00089-of-00100
news.en-00024-of-00100  news.en-00057-of-00100  news.en-00090-of-00100
news.en-00025-of-00100  news.en-00058-of-00100  news.en-00091-of-00100
news.en-00026-of-00100  news.en-00059-of-00100  news.en-00092-of-00100
news.en-00027-of-00100  news.en-00060-of-00100  news.en-00093-of-00100
news.en-00028-of-00100  news.en-00061-of-00100  news.en-00094-of-00100
news.en-00029-of-00100  news.en-00062-of-00100  news.en-00095-of-00100
news.en-00030-of-00100  news.en-00063-of-00100  news.en-00096-of-00100
news.en-00031-of-00100  news.en-00064-of-00100  news.en-00097-of-00100
news.en-00032-of-00100  news.en-00065-of-00100  news.en-00098-of-00100
news.en-00033-of-00100  news.en-00066-of-00100  news.en-00099-of-00100
root@3a240370f11d:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# exit
exit
[chibi@centos8 ~]$ cat /etc/redhat-release
CentOS Linux release 8.1.1911 (Core)
[chibi@centos8 ~]$ nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Wed_Oct_23_19:24:38_PDT_2019
Cuda compilation tools, release 10.2, V10.2.89
[chibi@centos8 ~]$ sensors
eth0-pci-4400
Adapter: PCI adapter
PHY Temperature:  +49.5°C

k10temp-pci-00c3
Adapter: PCI adapter
Tdie:         +31.2°C  (high = +70.0°C)
Tctl:         +31.2°C

iwlwifi-virtual-0
Adapter: Virtual device
temp1:        +30.0°C

[chibi@centos8 ~]$ sudo hddtemp /dev/sda
[sudo] chibi のパスワード:
/dev/sda: Samsung SSD 840 PRO Series: 27°C
[chibi@centos8 ~]$ nvidia-smi nvlink -c
GPU 0: TITAN RTX (UUID: GPU-7fb51c1d-c1e7-35cc-aad7-66971f05ddb7)
GPU 1: TITAN RTX (UUID: GPU-5a71d61e-f130-637a-b33d-4df555b0ed88)
GPU 2: GeForce RTX 2080 Ti (UUID: GPU-1ac935c2-557f-282e-14e5-3f749ffd63ac)
GPU 3: GeForce RTX 2080 Ti (UUID: GPU-13277ce5-e1e9-0cb1-8cee-6c9e6618e774)
[chibi@centos8 ~]$ cat /proc/cpuinfo
processor       : 0
vendor_id       : AuthenticAMD
cpu family      : 23
model           : 49
model name      : AMD Ryzen Threadripper 3970X 32-Core Processor
stepping        : 0
microcode       : 0x8301025
cpu MHz         : 3470.887
cache size      : 512 KB
physical id     : 0
siblings        : 64
core id         : 0
cpu cores       : 32
apicid          : 0
initial apicid  : 0
fpu             : yes
fpu_exception   : yes
cpuid level     : 16
wp              : yes