[chibi@centos7 ~]$ sudo nvidia-docker run --rm -ti nvcr.io/nvidia/tensorflow:19.04-py3
Unable to find image 'nvcr.io/nvidia/tensorflow:19.04-py3' locally
19.04-py3: Pulling from nvidia/tensorflow
34667c7e4631: Pulling fs layer
d18d76a881a4: Pulling fs layer
119c7358fbfc: Pulling fs layer
2aaf13f3eff0: Waiting
202fa0f8874b: Pulling fs layer
3b700a61ede6: Waiting
87e6ca450d3f: Waiting
a1e76dce1aec: Waiting
9b91fa2f9276: Pulling fs layer
b5877a9add73: Pulling fs layer
bab74df105f1: Pulling fs layer
534bbf505504: Waiting
4956bf3bbbb9: Waiting
f4371944c97d: Waiting
4615a735431d: Waiting
5db2639932b5: Pulling fs layer
629d5c9d75a4: Pulling fs layer
8071b94b5429: Waiting
6eb8eba2ad5a: Waiting
e32e86c15b8b: Pulling fs layer
08db5b51b243: Pulling fs layer
f71ce95fb406: Waiting
3498ed8c5685: Waiting
62819d8896c1: Pulling fs layer
34bc85bf8bef: Pulling fs layer
4a95ca3431c4: Pulling fs layer
41bc2d0a4d4d: Waiting
a2ceadc61854: Waiting
2d0c5308ff92: Waiting
a531832992b8: Waiting
b24a8fd8f2e1: Pulling fs layer
8d9313624ab7: Pulling fs layer
e5cafe011f22: Waiting
eca19a329cd4: Waiting
65ee50af0bcc: Waiting
5f60ec8c32f4: Waiting
d7dcb657fa13: Pull complete
1f6ef6575fbe: Pull complete
d1ef346a3015: Pull complete
4ef9cb404fd5: Pull complete
f6797f45a018: Pull complete
1d4380527325: Pull complete
965f2629db02: Pull complete
5debff4c8c0a: Pull complete
b3a3a9d82be6: Pull complete
eac05f20b729: Pull complete
3ce0a7f80167: Pull complete
2a21e34a5784: Pull complete
c1ccf19e258e: Pull complete
0b6ea9d0652b: Pull complete
307bc8c3f024: Pull complete
ca75fd593a79: Pull complete
0cd3cdca1af7: Pull complete
48e857e9d372: Pull complete
3264ea403ca9: Pull complete
Digest: sha256:aaebc136d5d50937362675c77afd908bd96cded68846f39163050a023c8a9851
Status: Downloaded newer image for nvcr.io/nvidia/tensorflow:19.04-py3
                                                                                 
================
== TensorFlow ==
================

NVIDIA Release 19.04 (build 6132408)
TensorFlow Version 1.13.1

Container image Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
Copyright 2017-2019 The TensorFlow Authors.  All rights reserved.

Various files include modifications (c) NVIDIA CORPORATION.  All rights reserved.
NVIDIA modifications are covered by the license terms that apply to the underlying project or file.

NOTE: MOFED driver for multi-node communication was not detected.
      Multi-node communication performance may be reduced.

NOTE: The SHMEM allocation limit is set to the default of 64MB.  This may be
   insufficient for TensorFlow.  NVIDIA recommends the use of the following flags:
   nvidia-docker run --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 ...

root@93ad39173528:/workspace# ls
README.md  docker-examples  nvidia-examples
root@93ad39173528:/workspace# cd nvidia-examples
root@93ad39173528:/workspace/nvidia-examples# ls
NCF              bert                 cnn           ssdv1.2
OpenSeq2Seq      big_lstm             gnmt_v2       tensorrt
UNet_Industrial  build_imagenet_data  resnet50v1.5
root@93ad39173528:/workspace/nvidia-examples# cd big_lstm
root@93ad39173528:/workspace/nvidia-examples/big_lstm# ls
1b_word_vocab.txt  data_utils_test.py         language_model_test.py
README.md          download_1b_words_data.sh  model_utils.py
__init__.py        hparams.py                 run_utils.py
common.py          hparams_test.py            single_lm_train.py
data_utils.py      language_model.py          testdata
root@93ad39173528:/workspace/nvidia-examples/big_lstm# ./download_1b_words_data.sh
Please specify root of dataset directory: data

Success: dataset root dir validated

--2019-04-28 19:12:35--  http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
Resolving www.statmt.org (www.statmt.org)... 129.215.197.184
Connecting to www.statmt.org (www.statmt.org)|129.215.197.184|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1792209805 (1.7G) [application/x-gzip]
Saving to: ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’

1-billion-word-langu 100%[===================>]   1.67G  1.56MB/s    in 14m 19s

2019-04-28 19:26:55 (1.99 MB/s) - ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’ saved [1792209805/1792209805]

1-billion-word-language-modeling-benchmark-r13output/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00024-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00057-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00055-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00096-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00081-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00033-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00072-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00082-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00018-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00008-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00059-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00005-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00091-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00062-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00031-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00095-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00076-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00006-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00038-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00015-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00087-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00021-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00049-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00009-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00027-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00056-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00046-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00032-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00029-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00088-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00085-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00011-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00012-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00067-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00003-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00093-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00050-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00053-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00044-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00019-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00066-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00028-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00045-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00039-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00071-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00052-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00078-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00037-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00002-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00014-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00048-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00017-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00004-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00077-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00080-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00020-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00051-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00016-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00079-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00043-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00068-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00099-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00064-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00034-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00054-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00040-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00070-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00063-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00041-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00083-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00061-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00073-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00094-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00030-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00060-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00035-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00023-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00042-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00025-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00090-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00089-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00065-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00075-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00022-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00026-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00098-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00084-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00010-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00069-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00013-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00092-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00036-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00097-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00007-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00074-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00001-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00047-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00086-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00058-of-00100
1-billion-word-language-modeling-benchmark-r13output/.svn/
1-billion-word-language-modeling-benchmark-r13output/.svn/tmp/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/de102cd0c91cd19e6612f0840e68a2f20ba8134c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/deed1b75d3bd5cc36ae6aeb85d56680b892b7948.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/86c58db52fbf362c5bc329afc33b8805085fcb0d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/9f2882e21f860a83ad6ea8898ebab140974ed301.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/bcdbc523ee7488dc438cab869b6d5e236578dbfa.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/d2718bc26d0ee0a213d7d4add99a304cb5b39ede.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/c5b24f61479da923123d0394a188da922ea0359c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/116d6ea61730d8199127596b072e981338597779.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/b0e26559cfe641245584a9400b35ba28d64f1411.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/d3ae508e3bcb0e696dd70aecd052410f1f7afc1d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/9e148bd766e8805e0eb97eeae250433ec7a2e996.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/31b645a482e0b81fda3c567cada307c6fcf7ec80.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/da39a3ee5e6b4b0d3255bfef95601890afd80709.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/c1ed42c415ec884e591fb5c70d373da640a383b5.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/e37ba0f85e94073ccaced1eed7e4f5d737a25f49.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/entries
1-billion-word-language-modeling-benchmark-r13output/.svn/format
1-billion-word-language-modeling-benchmark-r13output/.svn/wc.db
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00015-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00031-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00027-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00010-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00033-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00042-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00046-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00037-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00029-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00013-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00002-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00048-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00006-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00030-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00025-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00039-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00008-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00020-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00001-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00034-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00044-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00045-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00016-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00004-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00035-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00038-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00009-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00024-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00022-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00021-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00032-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00011-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00049-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00041-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00019-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00023-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00040-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00014-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00007-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00017-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00012-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00018-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00003-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00028-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en-00000-of-00100
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00043-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00005-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00036-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00026-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00047-of-00050
1-billion-word-language-modeling-benchmark-r13output/README

Success! One billion words dataset ready at:
data/1-billion-word-language-modeling-benchmark-r13output/
Please pass this dir to single_lm_train.py via the --datadir option.

root@93ad39173528:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'state_size': 2048, 'batch_size': 128, 'num_steps': 20, 'emb_size': 512, 'num_layers': 1, 'max_grad_norm': 10.0, 'num_delayed_steps': 150, 'num_sampled': 8192, 'projected_size': 512, 'run_profiler': False, 'keep_prob': 0.9, 'num_gpus': 2, 'optimizer': 0, 'vocab_size': 793470, 'num_shards': 8, 'do_summaries': False, 'learning_rate': 0.2, 'average_params': True, 'max_time': 180}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1556479659.1340022
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2019-04-28 19:27:39.644428: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2998230000 Hz
2019-04-28 19:27:39.645753: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x8061b70 executing computations on platform Host. Devices:
2019-04-28 19:27:39.645788: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2019-04-28 19:27:39.935544: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x8061590 executing computations on platform CUDA. Devices:
2019-04-28 19:27:39.935626: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): GeForce RTX 2080 Ti, Compute Capability 7.5
2019-04-28 19:27:39.935674: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): GeForce RTX 2080 Ti, Compute Capability 7.5
2019-04-28 19:27:39.937295: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:05:00.0
totalMemory: 10.73GiB freeMemory: 10.42GiB
2019-04-28 19:27:39.938594: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:09:00.0
totalMemory: 10.73GiB freeMemory: 10.57GiB
2019-04-28 19:27:39.938703: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1
2019-04-28 19:27:40.821501: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-04-28 19:27:40.821553: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1
2019-04-28 19:27:40.821574: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N Y
2019-04-28 19:27:40.821584: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   Y N
2019-04-28 19:27:40.822587: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10053 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:05:00.0, compute capability: 7.5)
2019-04-28 19:27:40.823125: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10197 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:09:00.0, compute capability: 7.5)
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00062-of-00100
Finished processing!
2019-04-28 19:28:03.596613: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 1, time = 31.27s, wps = 164, train loss = 12.9863
Iteration 2, time = 8.73s, wps = 586, train loss = 12.9763
Iteration 3, time = 0.09s, wps = 54624, train loss = 12.8721
Iteration 4, time = 0.09s, wps = 57849, train loss = 12.7820
Iteration 5, time = 0.08s, wps = 63636, train loss = 20.4399
Iteration 6, time = 0.07s, wps = 69269, train loss = 13.9663
Iteration 7, time = 0.07s, wps = 70123, train loss = 13.5054
Iteration 8, time = 0.08s, wps = 67349, train loss = 12.0135
Iteration 9, time = 0.08s, wps = 67055, train loss = 32.0211
Iteration 20, time = 0.79s, wps = 71659, train loss = 10.5995
Iteration 40, time = 1.45s, wps = 70850, train loss = 9.6776
Iteration 60, time = 1.51s, wps = 67904, train loss = 9.6903
Iteration 80, time = 1.50s, wps = 68469, train loss = 8.7644
Iteration 100, time = 1.51s, wps = 67837, train loss = 9.0633
Iteration 120, time = 1.48s, wps = 69279, train loss = 7.9513
Iteration 140, time = 1.50s, wps = 68281, train loss = 7.9670
Iteration 160, time = 1.50s, wps = 68425, train loss = 7.5329
Iteration 180, time = 1.48s, wps = 69073, train loss = 6.9763
Iteration 200, time = 1.48s, wps = 69131, train loss = 6.7974
Iteration 220, time = 1.49s, wps = 68554, train loss = 6.8702
Iteration 240, time = 1.49s, wps = 68933, train loss = 6.5882
Iteration 260, time = 1.49s, wps = 68670, train loss = 6.4156
Iteration 280, time = 1.51s, wps = 67858, train loss = 6.4035
Iteration 300, time = 1.48s, wps = 69022, train loss = 6.2008
Iteration 320, time = 1.47s, wps = 69838, train loss = 6.1930
Iteration 340, time = 1.45s, wps = 70423, train loss = 6.1602
Iteration 360, time = 1.51s, wps = 67862, train loss = 6.2367
Iteration 380, time = 1.49s, wps = 68929, train loss = 6.2464
Iteration 400, time = 1.48s, wps = 69078, train loss = 6.1364
Iteration 420, time = 1.49s, wps = 68618, train loss = 6.0992
Iteration 440, time = 1.47s, wps = 69469, train loss = 6.1997
Iteration 460, time = 1.45s, wps = 70568, train loss = 6.0707
Iteration 480, time = 1.51s, wps = 67937, train loss = 5.9639
Iteration 500, time = 1.50s, wps = 68425, train loss = 5.8587
Iteration 520, time = 1.51s, wps = 67595, train loss = 5.9838
Iteration 540, time = 1.50s, wps = 68177, train loss = 6.0829
Iteration 560, time = 1.49s, wps = 68638, train loss = 5.9225
Iteration 580, time = 1.51s, wps = 67905, train loss = 5.8536
Iteration 600, time = 1.46s, wps = 69999, train loss = 5.9061
Iteration 620, time = 1.45s, wps = 70454, train loss = 5.8018
Iteration 640, time = 1.49s, wps = 68929, train loss = 5.9126
Iteration 660, time = 1.48s, wps = 69256, train loss = 5.7015
Iteration 680, time = 1.49s, wps = 68697, train loss = 5.8246
Iteration 700, time = 1.50s, wps = 68429, train loss = 5.8883
Iteration 720, time = 1.50s, wps = 68334, train loss = 5.7067
Iteration 740, time = 1.50s, wps = 68092, train loss = 5.7755
Iteration 760, time = 1.49s, wps = 68935, train loss = 5.7394
Iteration 780, time = 1.49s, wps = 68694, train loss = 5.6238
Iteration 800, time = 1.52s, wps = 67533, train loss = 5.6209
Iteration 820, time = 1.52s, wps = 67392, train loss = 5.5953
Iteration 840, time = 1.50s, wps = 68058, train loss = 5.6256
Iteration 860, time = 1.50s, wps = 68152, train loss = 5.6173
Iteration 880, time = 1.52s, wps = 67470, train loss = 5.6300
Iteration 900, time = 1.50s, wps = 68154, train loss = 5.5688
Iteration 920, time = 1.47s, wps = 69436, train loss = 5.5823
Iteration 940, time = 1.50s, wps = 68116, train loss = 5.5138
Iteration 960, time = 1.51s, wps = 67745, train loss = 5.5400
Iteration 980, time = 1.51s, wps = 67951, train loss = 5.4869
Iteration 1000, time = 1.48s, wps = 69239, train loss = 5.5289
Iteration 1020, time = 1.51s, wps = 67865, train loss = 5.5149
Iteration 1040, time = 1.51s, wps = 68010, train loss = 5.4629
Iteration 1060, time = 1.48s, wps = 69331, train loss = 5.5039
Iteration 1080, time = 1.47s, wps = 69430, train loss = 5.5216
Iteration 1100, time = 1.49s, wps = 68766, train loss = 5.4608
Iteration 1120, time = 1.50s, wps = 68374, train loss = 5.4768
Iteration 1140, time = 1.45s, wps = 70712, train loss = 5.4027
Iteration 1160, time = 1.52s, wps = 67563, train loss = 5.3587
Iteration 1180, time = 1.48s, wps = 69267, train loss = 5.3875
Iteration 1200, time = 1.49s, wps = 68874, train loss = 5.3652
Iteration 1220, time = 1.49s, wps = 68892, train loss = 5.3077
Iteration 1240, time = 1.44s, wps = 70926, train loss = 5.3540
Iteration 1260, time = 1.47s, wps = 69839, train loss = 5.3679
Iteration 1280, time = 1.50s, wps = 68351, train loss = 5.3590
Iteration 1300, time = 1.47s, wps = 69464, train loss = 5.3482
Iteration 1320, time = 1.49s, wps = 68938, train loss = 5.3999
Iteration 1340, time = 1.47s, wps = 69551, train loss = 5.3056
Iteration 1360, time = 1.49s, wps = 68580, train loss = 5.3076
Iteration 1380, time = 1.47s, wps = 69488, train loss = 5.2711
Iteration 1400, time = 1.46s, wps = 70203, train loss = 5.2761
Iteration 1420, time = 1.50s, wps = 68112, train loss = 5.2629
Iteration 1440, time = 1.49s, wps = 68706, train loss = 5.2241
Iteration 1460, time = 1.47s, wps = 69867, train loss = 5.1666
Iteration 1480, time = 1.50s, wps = 68124, train loss = 5.2835
Iteration 1500, time = 1.52s, wps = 67587, train loss = 5.2602
Iteration 1520, time = 1.46s, wps = 70017, train loss = 5.1122
Iteration 1540, time = 1.47s, wps = 69592, train loss = 5.3065
Iteration 1560, time = 1.49s, wps = 68931, train loss = 5.1757
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00020-of-00100
Finished processing!
Iteration 1580, time = 3.80s, wps = 26970, train loss = 5.2276
Iteration 1600, time = 1.47s, wps = 69694, train loss = 5.2452
Iteration 1620, time = 1.48s, wps = 69095, train loss = 5.1458
Iteration 1640, time = 1.48s, wps = 69363, train loss = 5.1395
Iteration 1660, time = 1.49s, wps = 68609, train loss = 5.2075
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m35.311s
user    8m8.155s
sys     1m2.900s
root@93ad39173528:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'batch_size': 128, 'num_sampled': 8192, 'num_steps': 20, 'num_gpus': 2, 'optimizer': 0, 'average_params': True, 'do_summaries': False, 'num_layers': 1, 'max_time': 180, 'learning_rate': 0.2, 'run_profiler': False, 'num_shards': 8, 'projected_size': 512, 'num_delayed_steps': 150, 'max_grad_norm': 10.0, 'vocab_size': 793470, 'keep_prob': 0.9, 'emb_size': 512, 'state_size': 2048}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1556480291.038397
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2019-04-28 19:38:11.554162: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2998230000 Hz
2019-04-28 19:38:11.555357: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x9842640 executing computations on platform Host. Devices:
2019-04-28 19:38:11.555391: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2019-04-28 19:38:11.876587: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x9842060 executing computations on platform CUDA. Devices:
2019-04-28 19:38:11.876665: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): GeForce RTX 2080 Ti, Compute Capability 7.5
2019-04-28 19:38:11.876743: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): GeForce RTX 2080 Ti, Compute Capability 7.5
2019-04-28 19:38:11.878407: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:05:00.0
totalMemory: 10.73GiB freeMemory: 10.42GiB
2019-04-28 19:38:11.879692: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:09:00.0
totalMemory: 10.73GiB freeMemory: 10.57GiB
2019-04-28 19:38:11.879825: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1
2019-04-28 19:38:12.754637: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-04-28 19:38:12.754690: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1
2019-04-28 19:38:12.754710: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N Y
2019-04-28 19:38:12.754721: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   Y N
2019-04-28 19:38:12.755683: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10052 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:05:00.0, compute capability: 7.5)
2019-04-28 19:38:12.756170: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10197 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:09:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00003-of-00100
Finished processing!
2019-04-28 19:38:38.676153: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 1678, time = 25.65s, wps = 200, train loss = 5.4681
Iteration 1679, time = 6.43s, wps = 796, train loss = 5.1977
Iteration 1680, time = 0.08s, wps = 61006, train loss = 5.3493
Iteration 1681, time = 0.17s, wps = 30328, train loss = 5.2724
Iteration 1682, time = 0.09s, wps = 58672, train loss = 5.1865
Iteration 1683, time = 0.08s, wps = 66615, train loss = 5.2235
Iteration 1684, time = 0.08s, wps = 67487, train loss = 5.2219
Iteration 1685, time = 0.08s, wps = 67076, train loss = 5.1617
Iteration 1686, time = 0.08s, wps = 67652, train loss = 5.1894
Iteration 1697, time = 0.83s, wps = 67461, train loss = 5.1521
Iteration 1717, time = 1.50s, wps = 68191, train loss = 5.1065
Iteration 1737, time = 1.48s, wps = 69347, train loss = 5.1865
Iteration 1757, time = 1.47s, wps = 69549, train loss = 5.1325
Iteration 1777, time = 1.46s, wps = 70122, train loss = 5.1627
Iteration 1797, time = 1.48s, wps = 69232, train loss = 5.1375
Iteration 1817, time = 1.49s, wps = 68603, train loss = 5.0801
Iteration 1837, time = 1.44s, wps = 71051, train loss = 5.1083
Iteration 1857, time = 1.46s, wps = 69991, train loss = 5.0360
Iteration 1877, time = 1.47s, wps = 69735, train loss = 5.0914
Iteration 1897, time = 1.52s, wps = 67342, train loss = 5.0525
Iteration 1917, time = 1.47s, wps = 69812, train loss = 5.0174
Iteration 1937, time = 1.49s, wps = 68613, train loss = 5.0319
Iteration 1957, time = 1.48s, wps = 69109, train loss = 5.1038
Iteration 1977, time = 1.51s, wps = 67976, train loss = 5.0915
Iteration 1997, time = 1.51s, wps = 67747, train loss = 5.0281
Iteration 2017, time = 1.49s, wps = 68667, train loss = 5.0631
Iteration 2037, time = 1.48s, wps = 69093, train loss = 4.9595
Iteration 2057, time = 1.51s, wps = 67753, train loss = 5.1087
Iteration 2077, time = 1.50s, wps = 68151, train loss = 5.0309
Iteration 2097, time = 1.47s, wps = 69679, train loss = 4.9908
Iteration 2117, time = 1.49s, wps = 68839, train loss = 4.9736
Iteration 2137, time = 1.50s, wps = 68335, train loss = 5.0061
Iteration 2157, time = 1.49s, wps = 68853, train loss = 5.0138
Iteration 2177, time = 1.52s, wps = 67465, train loss = 4.9954
Iteration 2197, time = 1.48s, wps = 69033, train loss = 5.0658
Iteration 2217, time = 1.49s, wps = 68559, train loss = 4.9606
Iteration 2237, time = 1.49s, wps = 68751, train loss = 4.9237
Iteration 2257, time = 1.47s, wps = 69550, train loss = 4.9624
Iteration 2277, time = 1.46s, wps = 70272, train loss = 4.9755
Iteration 2297, time = 1.50s, wps = 68237, train loss = 4.9003
Iteration 2317, time = 1.48s, wps = 69278, train loss = 4.9748
Iteration 2337, time = 1.47s, wps = 69562, train loss = 4.9684
Iteration 2357, time = 1.49s, wps = 68750, train loss = 5.0644
Iteration 2377, time = 1.48s, wps = 69342, train loss = 5.0115
Iteration 2397, time = 1.46s, wps = 69942, train loss = 4.9396
Iteration 2417, time = 1.52s, wps = 67362, train loss = 4.8809
Iteration 2437, time = 1.52s, wps = 67513, train loss = 4.9594
Iteration 2457, time = 1.53s, wps = 66863, train loss = 4.8467
Iteration 2477, time = 1.51s, wps = 67833, train loss = 4.8925
Iteration 2497, time = 1.50s, wps = 68454, train loss = 4.9379
Iteration 2517, time = 1.48s, wps = 69170, train loss = 4.8512
Iteration 2537, time = 1.48s, wps = 69367, train loss = 4.9573
Iteration 2557, time = 1.48s, wps = 69010, train loss = 4.8909
Iteration 2577, time = 1.51s, wps = 67703, train loss = 4.8641
Iteration 2597, time = 1.48s, wps = 69046, train loss = 4.8872
Iteration 2617, time = 1.50s, wps = 68073, train loss = 4.8883
Iteration 2637, time = 1.48s, wps = 69123, train loss = 4.9057
Iteration 2657, time = 1.47s, wps = 69427, train loss = 4.7686
Iteration 2677, time = 1.50s, wps = 68122, train loss = 4.8771
Iteration 2697, time = 1.49s, wps = 68554, train loss = 4.8242
Iteration 2717, time = 1.52s, wps = 67358, train loss = 4.8412
Iteration 2737, time = 1.52s, wps = 67405, train loss = 4.8838
Iteration 2757, time = 1.48s, wps = 69040, train loss = 4.8407
Iteration 2777, time = 1.51s, wps = 67598, train loss = 4.7908
Iteration 2797, time = 1.52s, wps = 67355, train loss = 4.8281
Iteration 2817, time = 1.47s, wps = 69456, train loss = 4.9066
Iteration 2837, time = 1.54s, wps = 66664, train loss = 4.8363
Iteration 2857, time = 1.48s, wps = 69411, train loss = 4.8351
Iteration 2877, time = 1.47s, wps = 69717, train loss = 4.8372
Iteration 2897, time = 1.52s, wps = 67470, train loss = 4.8538
Iteration 2917, time = 1.49s, wps = 68684, train loss = 4.8074
Iteration 2937, time = 1.45s, wps = 70536, train loss = 4.7461
Iteration 2957, time = 1.50s, wps = 68165, train loss = 4.8382
Iteration 2977, time = 1.48s, wps = 69004, train loss = 4.8192
Iteration 2997, time = 1.50s, wps = 68106, train loss = 4.7214
Iteration 3017, time = 1.52s, wps = 67409, train loss = 4.8687
Iteration 3037, time = 1.50s, wps = 68390, train loss = 4.7286
Iteration 3057, time = 1.55s, wps = 66057, train loss = 4.8307
Iteration 3077, time = 1.53s, wps = 67040, train loss = 4.8164
Iteration 3097, time = 1.53s, wps = 67067, train loss = 4.7752
Iteration 3117, time = 1.47s, wps = 69669, train loss = 4.7937
Iteration 3137, time = 1.51s, wps = 67811, train loss = 4.8250
Iteration 3157, time = 1.49s, wps = 68502, train loss = 4.7826
Iteration 3177, time = 1.51s, wps = 67725, train loss = 4.6920
Iteration 3197, time = 1.53s, wps = 66923, train loss = 4.8606
Iteration 3217, time = 1.52s, wps = 67275, train loss = 4.7368
Iteration 3237, time = 1.47s, wps = 69429, train loss = 4.8833
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00016-of-00100
Finished processing!
Iteration 3257, time = 3.99s, wps = 25639, train loss = 4.7925
Iteration 3277, time = 1.49s, wps = 68847, train loss = 4.7672
Iteration 3297, time = 1.49s, wps = 68699, train loss = 4.7789
Iteration 3317, time = 1.51s, wps = 68024, train loss = 4.7120
Iteration 3337, time = 1.48s, wps = 69353, train loss = 4.6939
Iteration 3357, time = 1.49s, wps = 68797, train loss = 4.6502
Iteration 3377, time = 1.47s, wps = 69581, train loss = 4.7234
Iteration 3397, time = 1.51s, wps = 67794, train loss = 4.8252
Iteration 3417, time = 1.48s, wps = 69407, train loss = 4.7822
Iteration 3437, time = 1.49s, wps = 68721, train loss = 4.6843
Iteration 3457, time = 1.48s, wps = 69140, train loss = 4.7446
Iteration 3477, time = 1.48s, wps = 68967, train loss = 4.6844
Iteration 3497, time = 1.52s, wps = 67228, train loss = 4.6897
Iteration 3517, time = 1.51s, wps = 67909, train loss = 4.6840
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m35.515s
user    8m32.144s
sys     1m3.367s
root@93ad39173528:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'do_summaries': False, 'run_profiler': False, 'num_gpus': 2, 'max_time': 180, 'num_layers': 1, 'num_sampled': 8192, 'state_size': 2048, 'batch_size': 128, 'learning_rate': 0.2, 'average_params': True, 'vocab_size': 793470, 'max_grad_norm': 10.0, 'emb_size': 512, 'projected_size': 512, 'num_shards': 8, 'keep_prob': 0.9, 'num_delayed_steps': 150, 'optimizer': 0, 'num_steps': 20}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1556480768.555226
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2019-04-28 19:46:09.083513: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2998230000 Hz
2019-04-28 19:46:09.084682: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x7b3fd20 executing computations on platform Host. Devices:
2019-04-28 19:46:09.084743: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2019-04-28 19:46:09.376661: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x7b3f740 executing computations on platform CUDA. Devices:
2019-04-28 19:46:09.376754: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): GeForce RTX 2080 Ti, Compute Capability 7.5
2019-04-28 19:46:09.376815: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): GeForce RTX 2080 Ti, Compute Capability 7.5
2019-04-28 19:46:09.378382: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:05:00.0
totalMemory: 10.73GiB freeMemory: 10.42GiB
2019-04-28 19:46:09.379685: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:09:00.0
totalMemory: 10.73GiB freeMemory: 10.57GiB
2019-04-28 19:46:09.379808: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1
2019-04-28 19:46:10.259959: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-04-28 19:46:10.260021: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1
2019-04-28 19:46:10.260041: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N Y
2019-04-28 19:46:10.260051: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   Y N
2019-04-28 19:46:10.261018: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10052 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:05:00.0, compute capability: 7.5)
2019-04-28 19:46:10.261497: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10197 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:09:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00079-of-00100
Finished processing!
2019-04-28 19:46:48.492395: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 3520, time = 32.60s, wps = 157, train loss = 4.9986
Iteration 3521, time = 6.79s, wps = 754, train loss = 4.7125
Iteration 3522, time = 0.08s, wps = 61817, train loss = 4.7437
Iteration 3523, time = 0.08s, wps = 63075, train loss = 4.7837
Iteration 3524, time = 0.08s, wps = 65140, train loss = 4.6921
Iteration 3525, time = 0.07s, wps = 72822, train loss = 4.7471
Iteration 3526, time = 0.08s, wps = 68242, train loss = 4.6718
Iteration 3527, time = 0.07s, wps = 70337, train loss = 4.6051
Iteration 3528, time = 0.07s, wps = 68391, train loss = 4.7329
Iteration 3539, time = 0.84s, wps = 66668, train loss = 4.7794
Iteration 3559, time = 1.48s, wps = 69180, train loss = 4.6539
Iteration 3579, time = 1.46s, wps = 70223, train loss = 4.7519
Iteration 3599, time = 1.48s, wps = 68978, train loss = 4.7198
Iteration 3619, time = 1.48s, wps = 69223, train loss = 4.6958
Iteration 3639, time = 1.50s, wps = 68275, train loss = 4.7010
Iteration 3659, time = 1.49s, wps = 68730, train loss = 4.7455
Iteration 3679, time = 1.51s, wps = 67733, train loss = 4.6900
Iteration 3699, time = 1.53s, wps = 66740, train loss = 4.6820
Iteration 3719, time = 1.45s, wps = 70482, train loss = 4.7090
Iteration 3739, time = 1.45s, wps = 70391, train loss = 4.7374
Iteration 3759, time = 1.46s, wps = 70054, train loss = 4.6932
Iteration 3779, time = 1.47s, wps = 69768, train loss = 4.5827
Iteration 3799, time = 1.46s, wps = 70031, train loss = 4.6614
Iteration 3819, time = 1.49s, wps = 68956, train loss = 4.6898
Iteration 3839, time = 1.49s, wps = 68569, train loss = 4.6994
Iteration 3859, time = 1.48s, wps = 69362, train loss = 4.6867
Iteration 3879, time = 1.51s, wps = 67871, train loss = 4.6470
Iteration 3899, time = 1.51s, wps = 67803, train loss = 4.7350
Iteration 3919, time = 1.47s, wps = 69485, train loss = 4.5656
Iteration 3939, time = 1.48s, wps = 69070, train loss = 4.6748
Iteration 3959, time = 1.51s, wps = 67970, train loss = 4.7354
Iteration 3979, time = 1.51s, wps = 67950, train loss = 4.6099
Iteration 3999, time = 1.51s, wps = 67868, train loss = 4.6609
Iteration 4019, time = 1.49s, wps = 68599, train loss = 4.6603
Iteration 4039, time = 1.52s, wps = 67414, train loss = 4.6524
Iteration 4059, time = 1.51s, wps = 68017, train loss = 4.6277
Iteration 4079, time = 1.49s, wps = 68608, train loss = 4.5948
Iteration 4099, time = 1.49s, wps = 68744, train loss = 4.5676
Iteration 4119, time = 1.51s, wps = 67969, train loss = 4.6847
Iteration 4139, time = 1.52s, wps = 67464, train loss = 4.5206
Iteration 4159, time = 1.48s, wps = 69200, train loss = 4.6039
Iteration 4179, time = 1.49s, wps = 68633, train loss = 4.5741
Iteration 4199, time = 1.45s, wps = 70852, train loss = 4.6036
Iteration 4219, time = 1.52s, wps = 67580, train loss = 4.6212
Iteration 4239, time = 1.52s, wps = 67397, train loss = 4.5473
Iteration 4259, time = 1.49s, wps = 68875, train loss = 4.5826
Iteration 4279, time = 1.50s, wps = 68199, train loss = 4.6117
Iteration 4299, time = 1.48s, wps = 69063, train loss = 4.5605
Iteration 4319, time = 1.48s, wps = 69013, train loss = 4.7154
Iteration 4339, time = 1.51s, wps = 67979, train loss = 4.5806
Iteration 4359, time = 1.52s, wps = 67546, train loss = 4.5191
Iteration 4379, time = 1.52s, wps = 67574, train loss = 4.6195
Iteration 4399, time = 1.48s, wps = 69101, train loss = 4.6537
Iteration 4419, time = 1.47s, wps = 69620, train loss = 4.5786
Iteration 4439, time = 1.47s, wps = 69522, train loss = 4.4702
Iteration 4459, time = 1.48s, wps = 69060, train loss = 4.5697
Iteration 4479, time = 1.49s, wps = 68956, train loss = 4.6499
Iteration 4499, time = 1.49s, wps = 68692, train loss = 4.5865
Iteration 4519, time = 1.50s, wps = 68190, train loss = 4.5531
Iteration 4539, time = 1.48s, wps = 69152, train loss = 4.5646
Iteration 4559, time = 1.50s, wps = 68131, train loss = 4.5530
Iteration 4579, time = 1.51s, wps = 67901, train loss = 4.6980
Iteration 4599, time = 1.49s, wps = 68569, train loss = 4.5510
Iteration 4619, time = 1.48s, wps = 69331, train loss = 4.5501
Iteration 4639, time = 1.50s, wps = 68488, train loss = 4.5234
Iteration 4659, time = 1.51s, wps = 67623, train loss = 4.5918
Iteration 4679, time = 1.50s, wps = 68379, train loss = 4.4687
Iteration 4699, time = 1.52s, wps = 67204, train loss = 4.5404
Iteration 4719, time = 1.52s, wps = 67256, train loss = 4.5362
Iteration 4739, time = 1.52s, wps = 67582, train loss = 4.5832
Iteration 4759, time = 1.52s, wps = 67429, train loss = 4.6507
Iteration 4779, time = 1.49s, wps = 68638, train loss = 4.4830
Iteration 4799, time = 1.47s, wps = 69437, train loss = 4.6590
Iteration 4819, time = 1.48s, wps = 69225, train loss = 4.5495
Iteration 4839, time = 1.46s, wps = 69972, train loss = 4.5475
Iteration 4859, time = 1.52s, wps = 67367, train loss = 4.5628
Iteration 4879, time = 1.49s, wps = 68746, train loss = 4.4944
Iteration 4899, time = 1.53s, wps = 67075, train loss = 4.6004
Iteration 4919, time = 1.50s, wps = 68075, train loss = 4.5055
Iteration 4939, time = 1.49s, wps = 68677, train loss = 4.4197
Iteration 4959, time = 1.50s, wps = 68207, train loss = 4.5679
Iteration 4979, time = 1.51s, wps = 68017, train loss = 4.5267
Iteration 4999, time = 1.53s, wps = 66894, train loss = 4.5588
Iteration 5019, time = 1.53s, wps = 67078, train loss = 4.5739
Iteration 5039, time = 1.50s, wps = 68407, train loss = 4.6326
Iteration 5059, time = 1.51s, wps = 67679, train loss = 4.5728
Iteration 5079, time = 1.47s, wps = 69539, train loss = 4.5522
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00071-of-00100
Finished processing!
Iteration 5099, time = 3.90s, wps = 26270, train loss = 4.4487
Iteration 5119, time = 1.54s, wps = 66311, train loss = 4.5074
Iteration 5139, time = 1.50s, wps = 68435, train loss = 4.4778
Iteration 5159, time = 1.50s, wps = 68483, train loss = 4.4587
Iteration 5179, time = 1.54s, wps = 66476, train loss = 4.5932
Iteration 5199, time = 1.45s, wps = 70653, train loss = 4.5380
Iteration 5219, time = 1.48s, wps = 69067, train loss = 4.5557
Iteration 5239, time = 1.50s, wps = 68103, train loss = 4.4592
Iteration 5259, time = 1.50s, wps = 68423, train loss = 4.5517
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m31.521s
user    8m3.953s
sys     0m59.516s
root@93ad39173528:/workspace/nvidia-examples/big_lstm# cat /etc/os-release
NAME="Ubuntu"
VERSION="16.04.6 LTS (Xenial Xerus)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 16.04.6 LTS"
VERSION_ID="16.04"
HOME_URL="http://www.ubuntu.com/"
SUPPORT_URL="http://help.ubuntu.com/"
BUG_REPORT_URL="http://bugs.launchpad.net/ubuntu/"
VERSION_CODENAME=xenial
UBUNTU_CODENAME=xenial
root@93ad39173528:/workspace/nvidia-examples/big_lstm# nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Fri_Feb__8_19:08:17_PST_2019
Cuda compilation tools, release 10.1, V10.1.105
root@93ad39173528:/workspace/nvidia-examples/big_lstm# cd data
root@93ad39173528:/workspace/nvidia-examples/big_lstm/data# ls
1-billion-word-language-modeling-benchmark-r13output
root@93ad39173528:/workspace/nvidia-examples/big_lstm/data# cd 1-billion-word-language-modeling-benchmark-r13output
root@93ad39173528:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# ls
1b_word_vocab.txt  heldout-monolingual.tokenized.shuffled
README             training-monolingual.tokenized.shuffled
root@93ad39173528:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# cd training-monolingual.tokenized.shuffled
root@93ad39173528:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# ls
news.en-00001-of-00100  news.en-00034-of-00100  news.en-00067-of-00100
news.en-00002-of-00100  news.en-00035-of-00100  news.en-00068-of-00100
news.en-00003-of-00100  news.en-00036-of-00100  news.en-00069-of-00100
news.en-00004-of-00100  news.en-00037-of-00100  news.en-00070-of-00100
news.en-00005-of-00100  news.en-00038-of-00100  news.en-00071-of-00100
news.en-00006-of-00100  news.en-00039-of-00100  news.en-00072-of-00100
news.en-00007-of-00100  news.en-00040-of-00100  news.en-00073-of-00100
news.en-00008-of-00100  news.en-00041-of-00100  news.en-00074-of-00100
news.en-00009-of-00100  news.en-00042-of-00100  news.en-00075-of-00100
news.en-00010-of-00100  news.en-00043-of-00100  news.en-00076-of-00100
news.en-00011-of-00100  news.en-00044-of-00100  news.en-00077-of-00100
news.en-00012-of-00100  news.en-00045-of-00100  news.en-00078-of-00100
news.en-00013-of-00100  news.en-00046-of-00100  news.en-00079-of-00100
news.en-00014-of-00100  news.en-00047-of-00100  news.en-00080-of-00100
news.en-00015-of-00100  news.en-00048-of-00100  news.en-00081-of-00100
news.en-00016-of-00100  news.en-00049-of-00100  news.en-00082-of-00100
news.en-00017-of-00100  news.en-00050-of-00100  news.en-00083-of-00100
news.en-00018-of-00100  news.en-00051-of-00100  news.en-00084-of-00100
news.en-00019-of-00100  news.en-00052-of-00100  news.en-00085-of-00100
news.en-00020-of-00100  news.en-00053-of-00100  news.en-00086-of-00100
news.en-00021-of-00100  news.en-00054-of-00100  news.en-00087-of-00100
news.en-00022-of-00100  news.en-00055-of-00100  news.en-00088-of-00100
news.en-00023-of-00100  news.en-00056-of-00100  news.en-00089-of-00100
news.en-00024-of-00100  news.en-00057-of-00100  news.en-00090-of-00100
news.en-00025-of-00100  news.en-00058-of-00100  news.en-00091-of-00100
news.en-00026-of-00100  news.en-00059-of-00100  news.en-00092-of-00100
news.en-00027-of-00100  news.en-00060-of-00100  news.en-00093-of-00100
news.en-00028-of-00100  news.en-00061-of-00100  news.en-00094-of-00100
news.en-00029-of-00100  news.en-00062-of-00100  news.en-00095-of-00100
news.en-00030-of-00100  news.en-00063-of-00100  news.en-00096-of-00100
news.en-00031-of-00100  news.en-00064-of-00100  news.en-00097-of-00100
news.en-00032-of-00100  news.en-00065-of-00100  news.en-00098-of-00100
news.en-00033-of-00100  news.en-00066-of-00100  news.en-00099-of-00100
root@93ad39173528:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# exit
exit
[chibi@centos7 ~]$ cat /etc/redhat-release
CentOS Linux release 7.6.1810 (Core)
[chibi@centos7 ~]$ nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Fri_Feb__8_19:08:17_PST_2019
Cuda compilation tools, release 10.1, V10.1.105
[chibi@centos7 ~]$ sudo hddtemp /dev/sda
[sudo] chibi のパスワード:
/dev/sda: TS128GSSD370S: 21°C
[chibi@centos7 ~]$ nvidia-smi nvlink -c
GPU 0: GeForce RTX 2080 Ti (UUID: GPU-1ac935c2-557f-282e-14e5-3f749ffd63ac)
         Link 0, P2P is supported: true
         Link 0, Access to system memory supported: true
         Link 0, P2P atomics supported: true
         Link 0, System memory atomics supported: true
         Link 0, SLI is supported: true
         Link 0, Link is supported: false
         Link 1, P2P is supported: true
         Link 1, Access to system memory supported: true
         Link 1, P2P atomics supported: true
         Link 1, System memory atomics supported: true
         Link 1, SLI is supported: true
         Link 1, Link is supported: false
GPU 1: GeForce RTX 2080 Ti (UUID: GPU-13277ce5-e1e9-0cb1-8cee-6c9e6618e774)
         Link 0, P2P is supported: true
         Link 0, Access to system memory supported: true
         Link 0, P2P atomics supported: true
         Link 0, System memory atomics supported: true
         Link 0, SLI is supported: true
         Link 0, Link is supported: false
         Link 1, P2P is supported: true
         Link 1, Access to system memory supported: true
         Link 1, P2P atomics supported: true
         Link 1, System memory atomics supported: true
         Link 1, SLI is supported: true
         Link 1, Link is supported: false
[chibi@centos7 ~]$