[chibi@centos7 ~]$ sudo nvidia-docker run --rm -ti nvcr.io/nvidia/tensorflow:19.04-py3
[sudo] chibi のパスワード:
                                                                                 
================
== TensorFlow ==
================

NVIDIA Release 19.04 (build 6132408)
TensorFlow Version 1.13.1

Container image Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
Copyright 2017-2019 The TensorFlow Authors.  All rights reserved.

Various files include modifications (c) NVIDIA CORPORATION.  All rights reserved.
NVIDIA modifications are covered by the license terms that apply to the underlying project or file.

NOTE: MOFED driver for multi-node communication was not detected.
      Multi-node communication performance may be reduced.

NOTE: The SHMEM allocation limit is set to the default of 64MB.  This may be
   insufficient for TensorFlow.  NVIDIA recommends the use of the following flags:
   nvidia-docker run --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 ...

root@05d8b668f4dc:/workspace# ls
README.md  docker-examples  nvidia-examples
root@05d8b668f4dc:/workspace# cd nvidia-examples
root@05d8b668f4dc:/workspace/nvidia-examples# ls
NCF              bert                 cnn           ssdv1.2
OpenSeq2Seq      big_lstm             gnmt_v2       tensorrt
UNet_Industrial  build_imagenet_data  resnet50v1.5
root@05d8b668f4dc:/workspace/nvidia-examples# cd big_lstm
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm# ls
1b_word_vocab.txt  data_utils_test.py         language_model_test.py
README.md          download_1b_words_data.sh  model_utils.py
__init__.py        hparams.py                 run_utils.py
common.py          hparams_test.py            single_lm_train.py
data_utils.py      language_model.py          testdata
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm# ./download_1b_words_data.sh
Please specify root of dataset directory: data

Success: dataset root dir validated

--2019-05-12 16:25:09--  http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
Resolving www.statmt.org (www.statmt.org)... 129.215.197.184
Connecting to www.statmt.org (www.statmt.org)|129.215.197.184|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1792209805 (1.7G) [application/x-gzip]
Saving to: ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’

1-billion-word-langu 100%[===================>]   1.67G  2.30MB/s    in 12m 30s

2019-05-12 16:37:39 (2.28 MB/s) - ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’ saved [1792209805/1792209805]

1-billion-word-language-modeling-benchmark-r13output/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00024-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00057-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00055-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00096-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00081-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00033-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00072-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00082-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00018-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00008-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00059-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00005-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00091-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00062-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00031-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00095-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00076-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00006-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00038-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00015-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00087-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00021-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00049-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00009-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00027-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00056-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00046-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00032-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00029-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00088-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00085-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00011-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00012-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00067-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00003-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00093-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00050-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00053-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00044-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00019-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00066-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00028-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00045-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00039-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00071-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00052-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00078-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00037-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00002-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00014-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00048-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00017-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00004-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00077-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00080-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00020-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00051-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00016-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00079-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00043-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00068-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00099-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00064-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00034-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00054-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00040-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00070-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00063-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00041-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00083-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00061-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00073-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00094-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00030-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00060-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00035-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00023-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00042-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00025-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00090-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00089-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00065-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00075-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00022-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00026-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00098-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00084-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00010-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00069-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00013-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00092-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00036-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00097-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00007-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00074-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00001-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00047-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00086-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00058-of-00100
1-billion-word-language-modeling-benchmark-r13output/.svn/
1-billion-word-language-modeling-benchmark-r13output/.svn/tmp/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/de102cd0c91cd19e6612f0840e68a2f20ba8134c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/deed1b75d3bd5cc36ae6aeb85d56680b892b7948.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/86c58db52fbf362c5bc329afc33b8805085fcb0d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/9f2882e21f860a83ad6ea8898ebab140974ed301.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/bcdbc523ee7488dc438cab869b6d5e236578dbfa.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/d2718bc26d0ee0a213d7d4add99a304cb5b39ede.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/c5b24f61479da923123d0394a188da922ea0359c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/116d6ea61730d8199127596b072e981338597779.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/b0e26559cfe641245584a9400b35ba28d64f1411.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/d3ae508e3bcb0e696dd70aecd052410f1f7afc1d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/9e148bd766e8805e0eb97eeae250433ec7a2e996.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/31b645a482e0b81fda3c567cada307c6fcf7ec80.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/da39a3ee5e6b4b0d3255bfef95601890afd80709.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/c1ed42c415ec884e591fb5c70d373da640a383b5.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/e37ba0f85e94073ccaced1eed7e4f5d737a25f49.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/entries
1-billion-word-language-modeling-benchmark-r13output/.svn/format
1-billion-word-language-modeling-benchmark-r13output/.svn/wc.db
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00015-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00031-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00027-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00010-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00033-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00042-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00046-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00037-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00029-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00013-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00002-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00048-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00006-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00030-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00025-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00039-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00008-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00020-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00001-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00034-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00044-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00045-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00016-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00004-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00035-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00038-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00009-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00024-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00022-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00021-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00032-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00011-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00049-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00041-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00019-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00023-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00040-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00014-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00007-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00017-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00012-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00018-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00003-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00028-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en-00000-of-00100
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00043-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00005-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00036-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00026-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00047-of-00050
1-billion-word-language-modeling-benchmark-r13output/README

Success! One billion words dataset ready at:
data/1-billion-word-language-modeling-benchmark-r13output/
Please pass this dir to single_lm_train.py via the --datadir option.

root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'max_grad_norm': 10.0, 'do_summaries': False, 'max_time': 180, 'run_profiler': False, 'vocab_size': 793470, 'learning_rate': 0.2, 'keep_prob': 0.9, 'num_gpus': 2, 'num_layers': 1, 'batch_size': 128, 'state_size': 2048, 'num_delayed_steps': 150, 'emb_size': 512, 'optimizer': 0, 'num_sampled': 8192, 'projected_size': 512, 'num_steps': 20, 'average_params': True, 'num_shards': 8}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1557679109.6628952
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2019-05-12 16:38:30.194339: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2998115000 Hz
2019-05-12 16:38:30.196575: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x90f8ca0 executing computations on platform Host. Devices:
2019-05-12 16:38:30.196609: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2019-05-12 16:38:30.459521: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x90f80e0 executing computations on platform CUDA. Devices:
2019-05-12 16:38:30.459584: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): GeForce RTX 2080 Ti, Compute Capability 7.5
2019-05-12 16:38:30.459627: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): GeForce RTX 2080 Ti, Compute Capability 7.5
2019-05-12 16:38:30.460988: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:05:00.0
totalMemory: 10.73GiB freeMemory: 10.45GiB
2019-05-12 16:38:30.462017: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:09:00.0
totalMemory: 10.73GiB freeMemory: 10.57GiB
2019-05-12 16:38:30.462111: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1
2019-05-12 16:38:32.264286: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-05-12 16:38:32.264348: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1
2019-05-12 16:38:32.264374: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N Y
2019-05-12 16:38:32.264387: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   Y N
2019-05-12 16:38:32.265508: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10076 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:05:00.0, compute capability: 7.5)
2019-05-12 16:38:32.266015: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10197 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:09:00.0, compute capability: 7.5)
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00092-of-00100
Finished processing!
2019-05-12 16:38:55.765727: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 1, time = 26.58s, wps = 193, train loss = 12.9367
Iteration 2, time = 5.85s, wps = 875, train loss = 12.9459
Iteration 3, time = 0.08s, wps = 62865, train loss = 12.9096
Iteration 4, time = 0.08s, wps = 63363, train loss = 12.6671
Iteration 5, time = 0.08s, wps = 66701, train loss = 34.5032
Iteration 6, time = 0.07s, wps = 70856, train loss = 37.8558
Iteration 7, time = 0.07s, wps = 70477, train loss = 16.0550
Iteration 8, time = 0.07s, wps = 74191, train loss = 12.1442
Iteration 9, time = 0.07s, wps = 75811, train loss = 13.2132
Iteration 20, time = 0.82s, wps = 69009, train loss = 10.1174
Iteration 40, time = 1.51s, wps = 67779, train loss = 9.2251
Iteration 60, time = 1.50s, wps = 68196, train loss = 8.7014
Iteration 80, time = 1.51s, wps = 67915, train loss = 8.1260
Iteration 100, time = 1.48s, wps = 68971, train loss = 8.1156
Iteration 120, time = 1.48s, wps = 69254, train loss = 7.7862
Iteration 140, time = 1.48s, wps = 69011, train loss = 7.5972
Iteration 160, time = 1.50s, wps = 68239, train loss = 7.1534
Iteration 180, time = 1.50s, wps = 68486, train loss = 7.1691
Iteration 200, time = 1.46s, wps = 70228, train loss = 6.7381
Iteration 220, time = 1.50s, wps = 68185, train loss = 6.5438
Iteration 240, time = 1.51s, wps = 67740, train loss = 6.6239
Iteration 260, time = 1.48s, wps = 68981, train loss = 6.5643
Iteration 280, time = 1.49s, wps = 68520, train loss = 6.2892
Iteration 300, time = 1.55s, wps = 65907, train loss = 6.4941
Iteration 320, time = 1.51s, wps = 67956, train loss = 6.1577
Iteration 340, time = 1.49s, wps = 68792, train loss = 6.3690
Iteration 360, time = 1.49s, wps = 68810, train loss = 6.1950
Iteration 380, time = 1.51s, wps = 68016, train loss = 6.0865
Iteration 400, time = 1.48s, wps = 69247, train loss = 6.1103
Iteration 420, time = 1.49s, wps = 68521, train loss = 6.0225
Iteration 440, time = 1.51s, wps = 67865, train loss = 6.0892
Iteration 460, time = 1.48s, wps = 69042, train loss = 6.0406
Iteration 480, time = 1.51s, wps = 67826, train loss = 5.9354
Iteration 500, time = 1.52s, wps = 67570, train loss = 5.9295
Iteration 520, time = 1.50s, wps = 68455, train loss = 5.8885
Iteration 540, time = 1.52s, wps = 67545, train loss = 5.8644
Iteration 560, time = 1.51s, wps = 67598, train loss = 5.8511
Iteration 580, time = 1.52s, wps = 67559, train loss = 5.8748
Iteration 600, time = 1.51s, wps = 67770, train loss = 5.9451
Iteration 620, time = 1.51s, wps = 68028, train loss = 5.7838
Iteration 640, time = 1.50s, wps = 68303, train loss = 5.7940
Iteration 660, time = 1.50s, wps = 68365, train loss = 5.7574
Iteration 680, time = 1.51s, wps = 67735, train loss = 5.8003
Iteration 700, time = 1.50s, wps = 68300, train loss = 5.6973
Iteration 720, time = 1.50s, wps = 68363, train loss = 5.8025
Iteration 740, time = 1.49s, wps = 68710, train loss = 5.7190
Iteration 760, time = 1.51s, wps = 67686, train loss = 5.6856
Iteration 780, time = 1.51s, wps = 67821, train loss = 5.5872
Iteration 800, time = 1.50s, wps = 68408, train loss = 5.7019
Iteration 820, time = 1.50s, wps = 68055, train loss = 5.6281
Iteration 840, time = 1.53s, wps = 67004, train loss = 5.5804
Iteration 860, time = 1.52s, wps = 67196, train loss = 5.5821
Iteration 880, time = 1.50s, wps = 68239, train loss = 5.5826
Iteration 900, time = 1.54s, wps = 66425, train loss = 5.5191
Iteration 920, time = 1.48s, wps = 69008, train loss = 5.5374
Iteration 940, time = 1.50s, wps = 68457, train loss = 5.5223
Iteration 960, time = 1.51s, wps = 67805, train loss = 5.4527
Iteration 980, time = 1.52s, wps = 67268, train loss = 5.4273
Iteration 1000, time = 1.51s, wps = 67678, train loss = 5.4710
Iteration 1020, time = 1.47s, wps = 69594, train loss = 5.4785
Iteration 1040, time = 1.54s, wps = 66542, train loss = 5.4305
Iteration 1060, time = 1.50s, wps = 68336, train loss = 5.5232
Iteration 1080, time = 1.55s, wps = 66266, train loss = 5.4484
Iteration 1100, time = 1.52s, wps = 67531, train loss = 5.4686
Iteration 1120, time = 1.54s, wps = 66342, train loss = 5.2981
Iteration 1140, time = 1.51s, wps = 67637, train loss = 5.3207
Iteration 1160, time = 1.52s, wps = 67531, train loss = 5.3872
Iteration 1180, time = 1.55s, wps = 66245, train loss = 5.4118
Iteration 1200, time = 1.50s, wps = 68314, train loss = 5.4081
Iteration 1220, time = 1.52s, wps = 67151, train loss = 5.2897
Iteration 1240, time = 1.51s, wps = 67857, train loss = 5.4553
Iteration 1260, time = 1.50s, wps = 68116, train loss = 5.4054
Iteration 1280, time = 1.51s, wps = 67598, train loss = 5.3616
Iteration 1300, time = 1.50s, wps = 68108, train loss = 5.2642
Iteration 1320, time = 1.53s, wps = 67024, train loss = 5.2987
Iteration 1340, time = 1.50s, wps = 68167, train loss = 5.1633
Iteration 1360, time = 1.53s, wps = 66801, train loss = 5.3071
Iteration 1380, time = 1.50s, wps = 68352, train loss = 5.2425
Iteration 1400, time = 1.53s, wps = 66824, train loss = 5.2525
Iteration 1420, time = 1.50s, wps = 68358, train loss = 5.2758
Iteration 1440, time = 1.52s, wps = 67486, train loss = 5.2524
Iteration 1460, time = 1.48s, wps = 68999, train loss = 5.1954
Iteration 1480, time = 1.50s, wps = 68382, train loss = 5.1333
Iteration 1500, time = 1.50s, wps = 68063, train loss = 5.2567
Iteration 1520, time = 1.50s, wps = 68296, train loss = 5.2089
Iteration 1540, time = 1.52s, wps = 67340, train loss = 5.2400
Iteration 1560, time = 1.52s, wps = 67442, train loss = 5.2065
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00033-of-00100
Finished processing!
Iteration 1580, time = 3.69s, wps = 27771, train loss = 5.1460
Iteration 1600, time = 1.50s, wps = 68143, train loss = 5.1463
Iteration 1620, time = 1.53s, wps = 67062, train loss = 5.1295
Iteration 1640, time = 1.51s, wps = 67714, train loss = 5.2030
Iteration 1660, time = 1.52s, wps = 67531, train loss = 5.1596
Iteration 1680, time = 1.51s, wps = 67908, train loss = 5.2414
Iteration 1700, time = 1.50s, wps = 68208, train loss = 5.0802
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m38.930s
user    8m22.699s
sys     0m57.323s
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'num_sampled': 8192, 'optimizer': 0, 'num_shards': 8, 'batch_size': 128, 'num_delayed_steps': 150, 'projected_size': 512, 'num_gpus': 2, 'vocab_size': 793470, 'num_layers': 1, 'state_size': 2048, 'num_steps': 20, 'run_profiler': False, 'max_time': 180, 'do_summaries': False, 'max_grad_norm': 10.0, 'learning_rate': 0.2, 'average_params': True, 'emb_size': 512, 'keep_prob': 0.9}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1557679737.06865
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2019-05-12 16:48:57.583500: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2998115000 Hz
2019-05-12 16:48:57.584633: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x7e93f40 executing computations on platform Host. Devices:
2019-05-12 16:48:57.584670: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2019-05-12 16:48:57.885429: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x7a304a0 executing computations on platform CUDA. Devices:
2019-05-12 16:48:57.885501: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): GeForce RTX 2080 Ti, Compute Capability 7.5
2019-05-12 16:48:57.885533: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): GeForce RTX 2080 Ti, Compute Capability 7.5
2019-05-12 16:48:57.886761: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:05:00.0
totalMemory: 10.73GiB freeMemory: 10.44GiB
2019-05-12 16:48:57.887728: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:09:00.0
totalMemory: 10.73GiB freeMemory: 10.57GiB
2019-05-12 16:48:57.887824: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1
2019-05-12 16:48:58.649257: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-05-12 16:48:58.649307: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1
2019-05-12 16:48:58.649325: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N Y
2019-05-12 16:48:58.649335: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   Y N
2019-05-12 16:48:58.650327: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10075 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:05:00.0, compute capability: 7.5)
2019-05-12 16:48:58.650843: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10197 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:09:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00004-of-00100
Finished processing!
2019-05-12 16:49:36.893056: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 1714, time = 37.36s, wps = 137, train loss = 5.8272
Iteration 1715, time = 5.92s, wps = 866, train loss = 5.4580
Iteration 1716, time = 0.08s, wps = 66518, train loss = 5.1857
Iteration 1717, time = 0.08s, wps = 67824, train loss = 5.1126
Iteration 1718, time = 0.08s, wps = 64763, train loss = 5.0719
Iteration 1719, time = 0.07s, wps = 68745, train loss = 5.1539
Iteration 1720, time = 0.08s, wps = 68171, train loss = 5.1259
Iteration 1721, time = 0.07s, wps = 70347, train loss = 5.0723
Iteration 1722, time = 0.08s, wps = 65195, train loss = 5.0939
Iteration 1733, time = 0.81s, wps = 69474, train loss = 5.1390
Iteration 1753, time = 1.50s, wps = 68212, train loss = 5.1462
Iteration 1773, time = 1.48s, wps = 69342, train loss = 5.1423
Iteration 1793, time = 1.53s, wps = 66757, train loss = 5.0346
Iteration 1813, time = 1.53s, wps = 67137, train loss = 5.1324
Iteration 1833, time = 1.53s, wps = 67086, train loss = 5.1237
Iteration 1853, time = 1.51s, wps = 67611, train loss = 5.0522
Iteration 1873, time = 1.51s, wps = 67757, train loss = 5.0927
Iteration 1893, time = 1.46s, wps = 69899, train loss = 5.0898
Iteration 1913, time = 1.52s, wps = 67324, train loss = 4.9612
Iteration 1933, time = 1.51s, wps = 67954, train loss = 5.1070
Iteration 1953, time = 1.48s, wps = 69109, train loss = 4.9604
Iteration 1973, time = 1.54s, wps = 66651, train loss = 5.0381
Iteration 1993, time = 1.50s, wps = 68125, train loss = 5.0371
Iteration 2013, time = 1.52s, wps = 67569, train loss = 5.0419
Iteration 2033, time = 1.50s, wps = 68400, train loss = 5.0269
Iteration 2053, time = 1.48s, wps = 69221, train loss = 5.0411
Iteration 2073, time = 1.50s, wps = 68444, train loss = 5.0138
Iteration 2093, time = 1.51s, wps = 67837, train loss = 5.0201
Iteration 2113, time = 1.51s, wps = 67726, train loss = 5.0791
Iteration 2133, time = 1.52s, wps = 67481, train loss = 4.9693
Iteration 2153, time = 1.50s, wps = 68353, train loss = 4.9978
Iteration 2173, time = 1.47s, wps = 69683, train loss = 5.0539
Iteration 2193, time = 1.46s, wps = 69921, train loss = 4.9713
Iteration 2213, time = 1.50s, wps = 68222, train loss = 4.9091
Iteration 2233, time = 1.51s, wps = 67669, train loss = 5.0268
Iteration 2253, time = 1.50s, wps = 68169, train loss = 4.9639
Iteration 2273, time = 1.50s, wps = 68369, train loss = 4.8966
Iteration 2293, time = 1.51s, wps = 67893, train loss = 4.8770
Iteration 2313, time = 1.50s, wps = 68148, train loss = 4.8766
Iteration 2333, time = 1.49s, wps = 68882, train loss = 4.9516
Iteration 2353, time = 1.51s, wps = 68026, train loss = 4.9238
Iteration 2373, time = 1.51s, wps = 67605, train loss = 4.9772
Iteration 2393, time = 1.48s, wps = 69287, train loss = 5.0021
Iteration 2413, time = 1.48s, wps = 69088, train loss = 4.9183
Iteration 2433, time = 1.50s, wps = 68457, train loss = 4.7858
Iteration 2453, time = 1.54s, wps = 66511, train loss = 4.9774
Iteration 2473, time = 1.49s, wps = 68545, train loss = 4.8881
Iteration 2493, time = 1.50s, wps = 68266, train loss = 4.8943
Iteration 2513, time = 1.50s, wps = 68314, train loss = 4.8575
Iteration 2533, time = 1.52s, wps = 67321, train loss = 4.8360
Iteration 2553, time = 1.54s, wps = 66453, train loss = 4.9178
Iteration 2573, time = 1.50s, wps = 68436, train loss = 4.9138
Iteration 2593, time = 1.51s, wps = 67998, train loss = 4.8775
Iteration 2613, time = 1.54s, wps = 66522, train loss = 4.8774
Iteration 2633, time = 1.52s, wps = 67367, train loss = 4.9109
Iteration 2653, time = 1.54s, wps = 66505, train loss = 4.9067
Iteration 2673, time = 1.49s, wps = 68585, train loss = 4.8559
Iteration 2693, time = 1.52s, wps = 67148, train loss = 4.8023
Iteration 2713, time = 1.50s, wps = 68096, train loss = 4.8410
Iteration 2733, time = 1.50s, wps = 68134, train loss = 4.8630
Iteration 2753, time = 1.52s, wps = 67310, train loss = 4.8822
Iteration 2773, time = 1.53s, wps = 66999, train loss = 4.8283
Iteration 2793, time = 1.54s, wps = 66493, train loss = 4.9021
Iteration 2813, time = 1.51s, wps = 67610, train loss = 4.7766
Iteration 2833, time = 1.52s, wps = 67444, train loss = 4.8832
Iteration 2853, time = 1.50s, wps = 68350, train loss = 4.8297
Iteration 2873, time = 1.50s, wps = 68329, train loss = 4.8153
Iteration 2893, time = 1.52s, wps = 67188, train loss = 4.8397
Iteration 2913, time = 1.52s, wps = 67526, train loss = 4.8670
Iteration 2933, time = 1.49s, wps = 68918, train loss = 4.7837
Iteration 2953, time = 1.51s, wps = 67665, train loss = 4.7545
Iteration 2973, time = 1.51s, wps = 67989, train loss = 4.8754
Iteration 2993, time = 1.50s, wps = 68209, train loss = 4.7706
Iteration 3013, time = 1.51s, wps = 67988, train loss = 4.7963
Iteration 3033, time = 1.49s, wps = 68954, train loss = 4.7365
Iteration 3053, time = 1.50s, wps = 68451, train loss = 4.7726
Iteration 3073, time = 1.50s, wps = 68347, train loss = 4.8016
Iteration 3093, time = 1.50s, wps = 68480, train loss = 4.8211
Iteration 3113, time = 1.50s, wps = 68396, train loss = 4.8105
Iteration 3133, time = 1.49s, wps = 68663, train loss = 4.7615
Iteration 3153, time = 1.54s, wps = 66520, train loss = 4.8207
Iteration 3173, time = 1.52s, wps = 67541, train loss = 4.7235
Iteration 3193, time = 1.49s, wps = 68864, train loss = 4.7541
Iteration 3213, time = 1.49s, wps = 68906, train loss = 4.7428
Iteration 3233, time = 1.50s, wps = 68101, train loss = 4.7978
Iteration 3253, time = 1.51s, wps = 68026, train loss = 4.6765
Iteration 3273, time = 1.52s, wps = 67339, train loss = 4.7902
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00036-of-00100
Finished processing!
Iteration 3293, time = 4.02s, wps = 25501, train loss = 4.6394
Iteration 3313, time = 1.52s, wps = 67266, train loss = 4.7695
Iteration 3333, time = 1.50s, wps = 68066, train loss = 4.7283
Iteration 3353, time = 1.50s, wps = 68163, train loss = 4.7598
Iteration 3373, time = 1.51s, wps = 67869, train loss = 4.6797
Iteration 3393, time = 1.51s, wps = 67597, train loss = 4.6499
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m35.115s
user    7m57.153s
sys     0m57.193s
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output

WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

*****HYPER PARAMETERS*****
{'average_params': True, 'vocab_size': 793470, 'projected_size': 512, 'run_profiler': False, 'batch_size': 128, 'emb_size': 512, 'num_steps': 20, 'max_time': 180, 'num_gpus': 2, 'optimizer': 0, 'num_sampled': 8192, 'keep_prob': 0.9, 'num_layers': 1, 'num_shards': 8, 'max_grad_norm': 10.0, 'learning_rate': 0.2, 'state_size': 2048, 'num_delayed_steps': 150, 'do_summaries': False}
**************************
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:75: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/language_model.py:107: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Current time: 1557680384.9179375
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2019-05-12 16:59:45.445561: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2998115000 Hz
2019-05-12 16:59:45.446803: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x7f9a740 executing computations on platform Host. Devices:
2019-05-12 16:59:45.446838: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): <undefined>, <undefined>
2019-05-12 16:59:45.726121: I tensorflow/compiler/xla/service/service.cc:161] XLA service 0x7f995a0 executing computations on platform CUDA. Devices:
2019-05-12 16:59:45.726206: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (0): GeForce RTX 2080 Ti, Compute Capability 7.5
2019-05-12 16:59:45.726240: I tensorflow/compiler/xla/service/service.cc:168]   StreamExecutor device (1): GeForce RTX 2080 Ti, Compute Capability 7.5
2019-05-12 16:59:45.727810: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:05:00.0
totalMemory: 10.73GiB freeMemory: 10.44GiB
2019-05-12 16:59:45.729092: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 1 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:09:00.0
totalMemory: 10.73GiB freeMemory: 10.57GiB
2019-05-12 16:59:45.729213: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0, 1
2019-05-12 16:59:46.596849: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-05-12 16:59:46.596915: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 1
2019-05-12 16:59:46.596935: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N Y
2019-05-12 16:59:46.596946: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 1:   Y N
2019-05-12 16:59:46.597923: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10075 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:05:00.0, compute capability: 7.5)
2019-05-12 16:59:46.598382: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10197 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:09:00.0, compute capability: 7.5)
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00036-of-00100
Finished processing!
2019-05-12 17:00:01.532679: I tensorflow/stream_executor/dso_loader.cc:153] successfully opened CUDA library libcublas.so.10 locally
Iteration 3396, time = 9.41s, wps = 544, train loss = 4.9814
Iteration 3397, time = 6.21s, wps = 824, train loss = 4.6714
Iteration 3398, time = 0.09s, wps = 60090, train loss = 4.7631
Iteration 3399, time = 0.08s, wps = 64512, train loss = 4.6604
Iteration 3400, time = 0.08s, wps = 62563, train loss = 4.6765
Iteration 3401, time = 0.08s, wps = 67640, train loss = 4.6201
Iteration 3402, time = 0.07s, wps = 68550, train loss = 4.6567
Iteration 3403, time = 0.08s, wps = 66719, train loss = 4.7988
Iteration 3404, time = 0.07s, wps = 69041, train loss = 4.7656
Iteration 3415, time = 0.82s, wps = 68824, train loss = 4.8012
Iteration 3435, time = 1.48s, wps = 69278, train loss = 4.6857
Iteration 3455, time = 1.61s, wps = 63435, train loss = 4.6506
Iteration 3475, time = 1.50s, wps = 68187, train loss = 4.6886
Iteration 3495, time = 1.54s, wps = 66624, train loss = 4.6239
Iteration 3515, time = 1.53s, wps = 66900, train loss = 4.6664
Iteration 3535, time = 1.52s, wps = 67342, train loss = 4.6676
Iteration 3555, time = 1.52s, wps = 67250, train loss = 4.6807
Iteration 3575, time = 1.52s, wps = 67227, train loss = 4.6863
Iteration 3595, time = 1.50s, wps = 68488, train loss = 4.6462
Iteration 3615, time = 1.52s, wps = 67280, train loss = 4.6985
Iteration 3635, time = 1.50s, wps = 68387, train loss = 4.7427
Iteration 3655, time = 1.50s, wps = 68124, train loss = 4.7150
Iteration 3675, time = 1.50s, wps = 68330, train loss = 4.7057
Iteration 3695, time = 1.54s, wps = 66651, train loss = 4.6499
Iteration 3715, time = 1.53s, wps = 66864, train loss = 4.6463
Iteration 3735, time = 1.49s, wps = 68641, train loss = 4.6889
Iteration 3755, time = 1.52s, wps = 67577, train loss = 4.7585
Iteration 3775, time = 1.51s, wps = 67944, train loss = 4.6182
Iteration 3795, time = 1.53s, wps = 66978, train loss = 4.5949
Iteration 3815, time = 1.53s, wps = 66982, train loss = 4.5706
Iteration 3835, time = 1.49s, wps = 68776, train loss = 4.6921
Iteration 3855, time = 1.54s, wps = 66453, train loss = 4.6874
Iteration 3875, time = 1.51s, wps = 67850, train loss = 4.7217
Iteration 3895, time = 1.51s, wps = 67835, train loss = 4.6591
Iteration 3915, time = 1.53s, wps = 67012, train loss = 4.6584
Iteration 3935, time = 1.50s, wps = 68066, train loss = 4.5916
Iteration 3955, time = 1.50s, wps = 68302, train loss = 4.5109
Iteration 3975, time = 1.51s, wps = 67592, train loss = 4.6573
Iteration 3995, time = 1.51s, wps = 67623, train loss = 4.5561
Iteration 4015, time = 1.54s, wps = 66610, train loss = 4.5952
Iteration 4035, time = 1.53s, wps = 66879, train loss = 4.7038
Iteration 4055, time = 1.49s, wps = 68706, train loss = 4.5498
Iteration 4075, time = 1.52s, wps = 67224, train loss = 4.6475
Iteration 4095, time = 1.46s, wps = 69960, train loss = 4.5408
Iteration 4115, time = 1.50s, wps = 68068, train loss = 4.6599
Iteration 4135, time = 1.48s, wps = 68988, train loss = 4.6312
Iteration 4155, time = 1.52s, wps = 67426, train loss = 4.6273
Iteration 4175, time = 1.52s, wps = 67517, train loss = 4.5817
Iteration 4195, time = 1.51s, wps = 67826, train loss = 4.5849
Iteration 4215, time = 1.51s, wps = 67620, train loss = 4.5808
Iteration 4235, time = 1.53s, wps = 67012, train loss = 4.5894
Iteration 4255, time = 1.49s, wps = 68564, train loss = 4.5008
Iteration 4275, time = 1.51s, wps = 67996, train loss = 4.6892
Iteration 4295, time = 1.48s, wps = 68979, train loss = 4.5923
Iteration 4315, time = 1.49s, wps = 68511, train loss = 4.5490
Iteration 4335, time = 1.49s, wps = 68937, train loss = 4.5297
Iteration 4355, time = 1.55s, wps = 66069, train loss = 4.5737
Iteration 4375, time = 1.51s, wps = 67641, train loss = 4.6023
Iteration 4395, time = 1.50s, wps = 68379, train loss = 4.6590
Iteration 4415, time = 1.48s, wps = 69398, train loss = 4.6837
Iteration 4435, time = 1.52s, wps = 67382, train loss = 4.5605
Iteration 4455, time = 1.51s, wps = 67849, train loss = 4.6586
Iteration 4475, time = 1.50s, wps = 68054, train loss = 4.5804
Iteration 4495, time = 1.51s, wps = 68030, train loss = 4.5329
Iteration 4515, time = 1.52s, wps = 67205, train loss = 4.5226
Iteration 4535, time = 1.52s, wps = 67200, train loss = 4.6073
Iteration 4555, time = 1.54s, wps = 66573, train loss = 4.5955
Iteration 4575, time = 1.48s, wps = 69241, train loss = 4.6249
Iteration 4595, time = 1.48s, wps = 69091, train loss = 4.5829
Iteration 4615, time = 1.47s, wps = 69475, train loss = 4.6000
Iteration 4635, time = 1.50s, wps = 68185, train loss = 4.5099
Iteration 4655, time = 1.51s, wps = 67677, train loss = 4.5454
Iteration 4675, time = 1.52s, wps = 67430, train loss = 4.5449
Iteration 4695, time = 1.55s, wps = 66266, train loss = 4.4804
Iteration 4715, time = 1.54s, wps = 66358, train loss = 4.5858
Iteration 4735, time = 1.50s, wps = 68054, train loss = 4.5517
Iteration 4755, time = 1.47s, wps = 69624, train loss = 4.6299
Iteration 4775, time = 1.50s, wps = 68070, train loss = 4.5875
Iteration 4795, time = 1.50s, wps = 68250, train loss = 4.4897
Iteration 4815, time = 1.51s, wps = 67704, train loss = 4.5883
Iteration 4835, time = 1.53s, wps = 66917, train loss = 4.5835
Iteration 4855, time = 1.49s, wps = 68522, train loss = 4.4889
Iteration 4875, time = 1.51s, wps = 67892, train loss = 4.5637
Iteration 4895, time = 1.52s, wps = 67494, train loss = 4.3852
Iteration 4915, time = 1.53s, wps = 67003, train loss = 4.4903
Iteration 4935, time = 1.53s, wps = 67127, train loss = 4.5140
Iteration 4955, time = 1.50s, wps = 68429, train loss = 4.5081
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00038-of-00100
Finished processing!
Iteration 4975, time = 4.05s, wps = 25282, train loss = 4.4494
Iteration 4995, time = 1.51s, wps = 67937, train loss = 4.5452
Iteration 5015, time = 1.52s, wps = 67447, train loss = 4.5508
Iteration 5035, time = 1.49s, wps = 68715, train loss = 4.5233
Iteration 5055, time = 1.49s, wps = 68706, train loss = 4.5380
Iteration 5075, time = 1.52s, wps = 67490, train loss = 4.5989
Iteration 5095, time = 1.51s, wps = 67720, train loss = 4.4320
Iteration 5115, time = 1.52s, wps = 67336, train loss = 4.5107
Iteration 5135, time = 1.54s, wps = 66678, train loss = 4.4097
Iteration 5155, time = 1.51s, wps = 67884, train loss = 4.5606
Iteration 5175, time = 1.49s, wps = 68645, train loss = 4.5850
Iteration 5195, time = 1.50s, wps = 68317, train loss = 4.5613
Iteration 5215, time = 1.48s, wps = 69035, train loss = 4.5529
Iteration 5235, time = 1.52s, wps = 67242, train loss = 4.4985
Iteration 5255, time = 1.50s, wps = 68463, train loss = 4.4701
Iteration 5275, time = 1.50s, wps = 68444, train loss = 4.4979
Iteration 5295, time = 1.52s, wps = 67292, train loss = 4.5197
Iteration 5315, time = 1.52s, wps = 67181, train loss = 4.5234
Iteration 5335, time = 1.53s, wps = 66925, train loss = 4.4944
Iteration 5355, time = 1.54s, wps = 66284, train loss = 4.6056
Iteration 5375, time = 1.53s, wps = 66947, train loss = 4.4786
Iteration 5395, time = 1.52s, wps = 67480, train loss = 4.4881
Iteration 5415, time = 1.50s, wps = 68070, train loss = 4.5139
/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/writer/writer.py:386: UserWarning: Attempting to use a closed FileWriter. The operation will be a noop unless the FileWriter is explicitly reopened.
  warnings.warn("Attempting to use a closed FileWriter. "

real    3m30.710s
user    9m29.311s
sys     1m3.969s
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm# cat /etc/os-release
NAME="Ubuntu"
VERSION="16.04.6 LTS (Xenial Xerus)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 16.04.6 LTS"
VERSION_ID="16.04"
HOME_URL="http://www.ubuntu.com/"
SUPPORT_URL="http://help.ubuntu.com/"
BUG_REPORT_URL="http://bugs.launchpad.net/ubuntu/"
VERSION_CODENAME=xenial
UBUNTU_CODENAME=xenial
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm# nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Fri_Feb__8_19:08:17_PST_2019
Cuda compilation tools, release 10.1, V10.1.105
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm# cd data
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm/data# ls
1-billion-word-language-modeling-benchmark-r13output
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm/data# cd 1-billion-word-language-modeling-benchmark-r13output
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# ls
1b_word_vocab.txt  heldout-monolingual.tokenized.shuffled
README             training-monolingual.tokenized.shuffled
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# cd training-monolingual.tokenized.shuffled
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# ls
news.en-00001-of-00100  news.en-00034-of-00100  news.en-00067-of-00100
news.en-00002-of-00100  news.en-00035-of-00100  news.en-00068-of-00100
news.en-00003-of-00100  news.en-00036-of-00100  news.en-00069-of-00100
news.en-00004-of-00100  news.en-00037-of-00100  news.en-00070-of-00100
news.en-00005-of-00100  news.en-00038-of-00100  news.en-00071-of-00100
news.en-00006-of-00100  news.en-00039-of-00100  news.en-00072-of-00100
news.en-00007-of-00100  news.en-00040-of-00100  news.en-00073-of-00100
news.en-00008-of-00100  news.en-00041-of-00100  news.en-00074-of-00100
news.en-00009-of-00100  news.en-00042-of-00100  news.en-00075-of-00100
news.en-00010-of-00100  news.en-00043-of-00100  news.en-00076-of-00100
news.en-00011-of-00100  news.en-00044-of-00100  news.en-00077-of-00100
news.en-00012-of-00100  news.en-00045-of-00100  news.en-00078-of-00100
news.en-00013-of-00100  news.en-00046-of-00100  news.en-00079-of-00100
news.en-00014-of-00100  news.en-00047-of-00100  news.en-00080-of-00100
news.en-00015-of-00100  news.en-00048-of-00100  news.en-00081-of-00100
news.en-00016-of-00100  news.en-00049-of-00100  news.en-00082-of-00100
news.en-00017-of-00100  news.en-00050-of-00100  news.en-00083-of-00100
news.en-00018-of-00100  news.en-00051-of-00100  news.en-00084-of-00100
news.en-00019-of-00100  news.en-00052-of-00100  news.en-00085-of-00100
news.en-00020-of-00100  news.en-00053-of-00100  news.en-00086-of-00100
news.en-00021-of-00100  news.en-00054-of-00100  news.en-00087-of-00100
news.en-00022-of-00100  news.en-00055-of-00100  news.en-00088-of-00100
news.en-00023-of-00100  news.en-00056-of-00100  news.en-00089-of-00100
news.en-00024-of-00100  news.en-00057-of-00100  news.en-00090-of-00100
news.en-00025-of-00100  news.en-00058-of-00100  news.en-00091-of-00100
news.en-00026-of-00100  news.en-00059-of-00100  news.en-00092-of-00100
news.en-00027-of-00100  news.en-00060-of-00100  news.en-00093-of-00100
news.en-00028-of-00100  news.en-00061-of-00100  news.en-00094-of-00100
news.en-00029-of-00100  news.en-00062-of-00100  news.en-00095-of-00100
news.en-00030-of-00100  news.en-00063-of-00100  news.en-00096-of-00100
news.en-00031-of-00100  news.en-00064-of-00100  news.en-00097-of-00100
news.en-00032-of-00100  news.en-00065-of-00100  news.en-00098-of-00100
news.en-00033-of-00100  news.en-00066-of-00100  news.en-00099-of-00100
root@05d8b668f4dc:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# exit
exit
[chibi@centos7 ~]$ cat /etc/redhat-release
CentOS Linux release 7.6.1810 (Core)
[chibi@centos7 ~]$ nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Fri_Feb__8_19:08:17_PST_2019
Cuda compilation tools, release 10.1, V10.1.105
[chibi@centos7 ~]$ nvidia-smi nvlink -c
GPU 0: GeForce RTX 2080 Ti (UUID: GPU-1ac935c2-557f-282e-14e5-3f749ffd63ac)
         Link 0, P2P is supported: true
         Link 0, Access to system memory supported: true
         Link 0, P2P atomics supported: true
         Link 0, System memory atomics supported: true
         Link 0, SLI is supported: true
         Link 0, Link is supported: false
         Link 1, P2P is supported: true
         Link 1, Access to system memory supported: true
         Link 1, P2P atomics supported: true
         Link 1, System memory atomics supported: true
         Link 1, SLI is supported: true
         Link 1, Link is supported: false
GPU 1: GeForce RTX 2080 Ti (UUID: GPU-13277ce5-e1e9-0cb1-8cee-6c9e6618e774)
         Link 0, P2P is supported: true
         Link 0, Access to system memory supported: true
         Link 0, P2P atomics supported: true
         Link 0, System memory atomics supported: true
         Link 0, SLI is supported: true
         Link 0, Link is supported: false
         Link 1, P2P is supported: true
         Link 1, Access to system memory supported: true
         Link 1, P2P atomics supported: true
         Link 1, System memory atomics supported: true
         Link 1, SLI is supported: true
         Link 1, Link is supported: false
[chibi@centos7 ~]$ sudo hddtemp /dev/sda
[sudo] chibi のパスワード:
/dev/sda: TS128GSSD370S: 25°C
[chibi@centos7 ~]$