chibi@1604:~$ sudo nvidia-docker run --rm -ti nvcr.io/nvidia/tensorflow:18.09-py3
[sudo] chibi のパスワード:
                                                                                 
================
== TensorFlow ==
================

NVIDIA Release 18.09 (build 687558)

Container image Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
Copyright 2017 The TensorFlow Authors.  All rights reserved.

Various files include modifications (c) NVIDIA CORPORATION.  All rights reserved.
NVIDIA modifications are covered by the license terms that apply to the underlying project or file.

NOTE: The SHMEM allocation limit is set to the default of 64MB.  This may be
   insufficient for TensorFlow.  NVIDIA recommends the use of the following flags:
   nvidia-docker run --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 ...

root@79928109e20f:/workspace# ls
README.md  docker-examples  nvidia-examples
root@79928109e20f:/workspace# cd nvidia-examples
root@79928109e20f:/workspace/nvidia-examples# ls
OpenSeq2Seq  big_lstm  build_imagenet_data  cnn  tftrt
root@79928109e20f:/workspace/nvidia-examples# cd big_lstm
root@79928109e20f:/workspace/nvidia-examples/big_lstm# ls
1b_word_vocab.txt  data_utils_test.py         language_model_test.py
README.md          download_1b_words_data.sh  model_utils.py
__init__.py        hparams.py                 run_utils.py
common.py          hparams_test.py            single_lm_train.py
data_utils.py      language_model.py          testdata
root@79928109e20f:/workspace/nvidia-examples/big_lstm# ./download_1b_words_data.sh
Please specify root of dataset directory: data

Success: dataset root dir validated

--2019-03-29 11:09:03--  http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
Resolving www.statmt.org (www.statmt.org)... 129.215.197.184
Connecting to www.statmt.org (www.statmt.org)|129.215.197.184|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1792209805 (1.7G) [application/x-gzip]
Saving to: ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’

1-billion-word-langu 100%[===================>]   1.67G  1.56MB/s    in 23m 48s

2019-03-29 11:32:52 (1.20 MB/s) - ‘1-billion-word-language-modeling-benchmark-r13output.tar.gz’ saved [1792209805/1792209805]

1-billion-word-language-modeling-benchmark-r13output/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00024-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00057-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00055-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00096-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00081-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00033-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00072-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00082-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00018-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00008-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00059-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00005-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00091-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00062-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00031-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00095-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00076-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00006-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00038-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00015-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00087-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00021-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00049-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00009-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00027-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00056-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00046-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00032-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00029-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00088-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00085-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00011-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00012-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00067-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00003-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00093-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00050-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00053-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00044-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00019-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00066-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00028-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00045-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00039-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00071-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00052-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00078-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00037-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00002-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00014-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00048-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00017-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00004-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00077-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00080-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00020-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00051-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00016-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00079-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00043-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00068-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00099-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00064-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00034-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00054-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00040-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00070-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00063-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00041-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00083-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00061-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00073-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00094-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00030-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00060-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00035-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00023-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00042-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00025-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00090-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00089-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00065-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00075-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00022-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00026-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00098-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00084-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00010-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00069-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00013-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00092-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00036-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00097-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00007-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00074-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00001-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00047-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00086-of-00100
1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00058-of-00100
1-billion-word-language-modeling-benchmark-r13output/.svn/
1-billion-word-language-modeling-benchmark-r13output/.svn/tmp/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/de102cd0c91cd19e6612f0840e68a2f20ba8134c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/de/deed1b75d3bd5cc36ae6aeb85d56680b892b7948.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/86/86c58db52fbf362c5bc329afc33b8805085fcb0d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9f/9f2882e21f860a83ad6ea8898ebab140974ed301.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/bc/bcdbc523ee7488dc438cab869b6d5e236578dbfa.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d2/d2718bc26d0ee0a213d7d4add99a304cb5b39ede.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c5/c5b24f61479da923123d0394a188da922ea0359c.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/11/116d6ea61730d8199127596b072e981338597779.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/b0/b0e26559cfe641245584a9400b35ba28d64f1411.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/d3/d3ae508e3bcb0e696dd70aecd052410f1f7afc1d.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/9e/9e148bd766e8805e0eb97eeae250433ec7a2e996.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/31/31b645a482e0b81fda3c567cada307c6fcf7ec80.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/da/da39a3ee5e6b4b0d3255bfef95601890afd80709.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/c1/c1ed42c415ec884e591fb5c70d373da640a383b5.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/
1-billion-word-language-modeling-benchmark-r13output/.svn/pristine/e3/e37ba0f85e94073ccaced1eed7e4f5d737a25f49.svn-base
1-billion-word-language-modeling-benchmark-r13output/.svn/entries
1-billion-word-language-modeling-benchmark-r13output/.svn/format
1-billion-word-language-modeling-benchmark-r13output/.svn/wc.db
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00015-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00031-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00027-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00010-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00033-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00042-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00046-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00037-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00029-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00013-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00002-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00048-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00006-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00030-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00025-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00039-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00008-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00020-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00001-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00034-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00044-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00045-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00016-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00004-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00035-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00038-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00009-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00024-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00022-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00021-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00032-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00011-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00049-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00041-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00019-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00023-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00040-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00014-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00007-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00017-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00012-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00018-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00003-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00028-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en-00000-of-00100
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00043-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00005-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00036-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00026-of-00050
1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00047-of-00050
1-billion-word-language-modeling-benchmark-r13output/README

Success! One billion words dataset ready at:
data/1-billion-word-language-modeling-benchmark-r13output/
Please pass this dir to single_lm_train.py via the --datadir option.

root@79928109e20f:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output
*****HYPER PARAMETERS*****
{'max_grad_norm': 10.0, 'num_sampled': 8192, 'vocab_size': 793470, 'num_delayed_steps': 150, 'average_params': True, 'num_layers': 1, 'num_steps': 20, 'optimizer': 0, 'num_shards': 8, 'run_profiler': False, 'emb_size': 512, 'batch_size': 128, 'max_time': 180, 'learning_rate': 0.2, 'state_size': 2048, 'keep_prob': 0.9, 'num_gpus': 2, 'projected_size': 512, 'do_summaries': False}
**************************
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
Current time: 1553859247.1953483
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2019-03-29 11:34:07.978556: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1405] Found device 0 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:05:00.0
totalMemory: 10.73GiB freeMemory: 10.26GiB
2019-03-29 11:34:08.066011: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1405] Found device 1 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:09:00.0
totalMemory: 10.73GiB freeMemory: 10.57GiB
2019-03-29 11:34:08.066059: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1484] Adding visible gpu devices: 0, 1
2019-03-29 11:34:09.283770: I tensorflow/core/common_runtime/gpu/gpu_device.cc:965] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-03-29 11:34:09.283809: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971]      0 1
2019-03-29 11:34:09.283817: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] 0:   N Y
2019-03-29 11:34:09.283822: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] 1:   Y N
2019-03-29 11:34:09.285998: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1097] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 9902 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:05:00.0, compute capability: 7.5)
2019-03-29 11:34:09.389717: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1097] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10207 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:09:00.0, compute capability: 7.5)
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00056-of-00100
Finished processing!
Iteration 1, time = 11.82s, wps = 433, train loss = 12.9820
Iteration 2, time = 4.31s, wps = 1187, train loss = 12.9772
Iteration 3, time = 0.09s, wps = 55712, train loss = 12.9312
Iteration 4, time = 0.08s, wps = 62061, train loss = 12.7381
Iteration 5, time = 0.08s, wps = 62737, train loss = 23.5511
Iteration 6, time = 0.08s, wps = 63445, train loss = 20.2303
Iteration 7, time = 0.08s, wps = 65897, train loss = 14.0865
Iteration 8, time = 0.08s, wps = 63441, train loss = 12.1315
Iteration 9, time = 0.08s, wps = 60743, train loss = 32.4823
Iteration 20, time = 0.89s, wps = 63346, train loss = 27.2612
Iteration 40, time = 1.61s, wps = 63656, train loss = 10.8757
Iteration 60, time = 1.64s, wps = 62622, train loss = 8.9842
Iteration 80, time = 1.63s, wps = 62906, train loss = 8.8265
Iteration 100, time = 1.63s, wps = 62916, train loss = 8.4189
Iteration 120, time = 1.63s, wps = 62775, train loss = 8.1042
Iteration 140, time = 1.85s, wps = 55387, train loss = 7.4399
Iteration 160, time = 1.64s, wps = 62429, train loss = 7.2140
Iteration 180, time = 1.61s, wps = 63620, train loss = 7.1239
Iteration 200, time = 1.66s, wps = 61845, train loss = 6.8052
Iteration 220, time = 1.64s, wps = 62294, train loss = 6.8148
Iteration 240, time = 1.64s, wps = 62406, train loss = 6.6060
Iteration 260, time = 1.63s, wps = 62685, train loss = 6.6465
Iteration 280, time = 1.63s, wps = 62916, train loss = 6.5393
Iteration 300, time = 1.67s, wps = 61286, train loss = 6.4381
Iteration 320, time = 1.62s, wps = 63153, train loss = 6.3035
Iteration 340, time = 1.64s, wps = 62565, train loss = 6.2822
Iteration 360, time = 1.62s, wps = 63087, train loss = 6.2192
Iteration 380, time = 1.65s, wps = 61924, train loss = 6.1547
Iteration 400, time = 1.63s, wps = 62685, train loss = 5.9958
Iteration 420, time = 1.66s, wps = 61809, train loss = 6.1256
Iteration 440, time = 1.65s, wps = 62199, train loss = 6.0710
Iteration 460, time = 1.63s, wps = 62683, train loss = 6.0110
Iteration 480, time = 1.64s, wps = 62474, train loss = 5.9320
Iteration 500, time = 1.65s, wps = 62076, train loss = 5.9341
Iteration 520, time = 1.62s, wps = 63245, train loss = 6.3383
Iteration 540, time = 1.65s, wps = 62219, train loss = 6.0287
Iteration 560, time = 1.61s, wps = 63431, train loss = 5.9706
Iteration 580, time = 1.63s, wps = 62675, train loss = 5.9255
Iteration 600, time = 1.63s, wps = 62996, train loss = 5.8541
Iteration 620, time = 1.65s, wps = 62161, train loss = 5.8512
Iteration 640, time = 1.65s, wps = 62019, train loss = 5.7942
Iteration 660, time = 1.66s, wps = 61765, train loss = 5.7757
Iteration 680, time = 1.64s, wps = 62617, train loss = 5.7614
Iteration 700, time = 1.63s, wps = 62873, train loss = 5.6767
Iteration 720, time = 1.68s, wps = 61075, train loss = 5.7011
Iteration 740, time = 1.65s, wps = 62110, train loss = 5.7365
Iteration 760, time = 1.67s, wps = 61501, train loss = 5.7003
Iteration 780, time = 1.64s, wps = 62301, train loss = 5.7451
Iteration 800, time = 1.62s, wps = 63089, train loss = 5.7371
Iteration 820, time = 1.64s, wps = 62556, train loss = 5.6732
Iteration 840, time = 1.64s, wps = 62329, train loss = 5.6016
Iteration 860, time = 1.64s, wps = 62626, train loss = 5.6228
Iteration 880, time = 1.64s, wps = 62299, train loss = 5.5566
Iteration 900, time = 1.65s, wps = 62141, train loss = 5.4891
Iteration 920, time = 1.65s, wps = 62155, train loss = 5.5838
Iteration 940, time = 1.64s, wps = 62342, train loss = 5.5549
Iteration 960, time = 1.64s, wps = 62317, train loss = 5.5741
Iteration 980, time = 1.63s, wps = 62905, train loss = 5.5097
Iteration 1000, time = 1.64s, wps = 62403, train loss = 5.5060
Iteration 1020, time = 1.64s, wps = 62526, train loss = 5.5360
Iteration 1040, time = 1.66s, wps = 61802, train loss = 5.5624
Iteration 1060, time = 1.65s, wps = 61909, train loss = 5.5976
Iteration 1080, time = 1.65s, wps = 61917, train loss = 5.4480
Iteration 1100, time = 1.66s, wps = 61866, train loss = 5.3861
Iteration 1120, time = 1.65s, wps = 62204, train loss = 5.4389
Iteration 1140, time = 1.65s, wps = 61941, train loss = 5.4059
Iteration 1160, time = 1.64s, wps = 62312, train loss = 5.3934
Iteration 1180, time = 1.66s, wps = 61537, train loss = 5.3890
Iteration 1200, time = 1.63s, wps = 62666, train loss = 5.4596
Iteration 1220, time = 1.66s, wps = 61566, train loss = 5.3729
Iteration 1240, time = 1.66s, wps = 61671, train loss = 5.3389
Iteration 1260, time = 1.66s, wps = 61858, train loss = 5.3536
Iteration 1280, time = 1.65s, wps = 61884, train loss = 5.3738
Iteration 1300, time = 1.64s, wps = 62295, train loss = 5.3180
Iteration 1320, time = 1.64s, wps = 62498, train loss = 5.3497
Iteration 1340, time = 1.65s, wps = 62101, train loss = 5.2800
Iteration 1360, time = 1.65s, wps = 61979, train loss = 5.2933
Iteration 1380, time = 1.65s, wps = 62058, train loss = 5.3065
Iteration 1400, time = 1.65s, wps = 61925, train loss = 5.3127
Iteration 1420, time = 1.64s, wps = 62591, train loss = 5.2860
Iteration 1440, time = 1.63s, wps = 62798, train loss = 5.2646
Iteration 1460, time = 1.64s, wps = 62462, train loss = 5.3210
Iteration 1480, time = 1.65s, wps = 62062, train loss = 5.1387
Iteration 1500, time = 1.64s, wps = 62448, train loss = 5.2199
Iteration 1520, time = 1.65s, wps = 62014, train loss = 5.2717
Iteration 1540, time = 1.64s, wps = 62511, train loss = 5.2801
Iteration 1560, time = 1.66s, wps = 61591, train loss = 5.2062
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00081-of-00100
Finished processing!
Iteration 1580, time = 3.75s, wps = 27281, train loss = 5.2359
Iteration 1600, time = 1.63s, wps = 62775, train loss = 5.1590
Iteration 1620, time = 1.66s, wps = 61819, train loss = 5.2344
Iteration 1640, time = 1.67s, wps = 61435, train loss = 5.1982
Iteration 1660, time = 1.64s, wps = 62502, train loss = 5.1712

real    3m41.956s
user    9m14.769s
sys     1m43.449s
root@79928109e20f:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output
*****HYPER PARAMETERS*****
{'num_sampled': 8192, 'keep_prob': 0.9, 'emb_size': 512, 'num_delayed_steps': 150, 'average_params': True, 'num_layers': 1, 'max_time': 180, 'num_steps': 20, 'vocab_size': 793470, 'run_profiler': False, 'projected_size': 512, 'do_summaries': False, 'max_grad_norm': 10.0, 'optimizer': 0, 'learning_rate': 0.2, 'num_shards': 8, 'state_size': 2048, 'batch_size': 128, 'num_gpus': 2}
**************************
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
Current time: 1553859692.4551651
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2019-03-29 11:41:33.254943: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1405] Found device 0 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:05:00.0
totalMemory: 10.73GiB freeMemory: 10.26GiB
2019-03-29 11:41:33.350832: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1405] Found device 1 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:09:00.0
totalMemory: 10.73GiB freeMemory: 10.57GiB
2019-03-29 11:41:33.350883: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1484] Adding visible gpu devices: 0, 1
2019-03-29 11:41:33.949308: I tensorflow/core/common_runtime/gpu/gpu_device.cc:965] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-03-29 11:41:33.949348: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971]      0 1
2019-03-29 11:41:33.949355: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] 0:   N Y
2019-03-29 11:41:33.949360: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] 1:   Y N
2019-03-29 11:41:33.950329: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1097] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 9902 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:05:00.0, compute capability: 7.5)
2019-03-29 11:41:34.053380: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1097] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10207 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:09:00.0, compute capability: 7.5)
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00071-of-00100
Finished processing!
Iteration 1672, time = 10.15s, wps = 504, train loss = 6.1268
Iteration 1673, time = 5.71s, wps = 897, train loss = 5.5942
Iteration 1674, time = 0.08s, wps = 63069, train loss = 5.2970
Iteration 1675, time = 0.07s, wps = 68584, train loss = 5.2362
Iteration 1676, time = 0.08s, wps = 64263, train loss = 5.1748
Iteration 1677, time = 0.08s, wps = 60584, train loss = 5.2073
Iteration 1678, time = 0.09s, wps = 58579, train loss = 5.2957
Iteration 1679, time = 0.08s, wps = 63489, train loss = 5.1740
Iteration 1680, time = 0.08s, wps = 61717, train loss = 5.1881
Iteration 1691, time = 0.88s, wps = 64102, train loss = 5.1214
Iteration 1711, time = 1.63s, wps = 62913, train loss = 5.2021
Iteration 1731, time = 1.68s, wps = 61108, train loss = 5.1545
Iteration 1751, time = 1.61s, wps = 63424, train loss = 5.1971
Iteration 1771, time = 1.63s, wps = 63015, train loss = 5.0851
Iteration 1791, time = 1.63s, wps = 62715, train loss = 5.1462
Iteration 1811, time = 1.63s, wps = 62767, train loss = 5.1037
Iteration 1831, time = 1.64s, wps = 62617, train loss = 5.0729
Iteration 1851, time = 1.64s, wps = 62629, train loss = 5.1038
Iteration 1871, time = 1.66s, wps = 61820, train loss = 5.0992
Iteration 1891, time = 1.66s, wps = 61760, train loss = 5.1336
Iteration 1911, time = 1.85s, wps = 55481, train loss = 5.0382
Iteration 1931, time = 1.64s, wps = 62456, train loss = 5.1259
Iteration 1951, time = 1.65s, wps = 62171, train loss = 5.1764
Iteration 1971, time = 1.64s, wps = 62481, train loss = 5.0379
Iteration 1991, time = 1.66s, wps = 61624, train loss = 5.0601
Iteration 2011, time = 1.60s, wps = 64029, train loss = 5.0818
Iteration 2031, time = 1.63s, wps = 62841, train loss = 5.0181
Iteration 2051, time = 1.66s, wps = 61804, train loss = 5.0393
Iteration 2071, time = 1.62s, wps = 63378, train loss = 5.0391
Iteration 2091, time = 1.66s, wps = 61502, train loss = 4.9893
Iteration 2111, time = 1.64s, wps = 62353, train loss = 4.9395
Iteration 2131, time = 1.66s, wps = 61844, train loss = 5.0925
Iteration 2151, time = 1.64s, wps = 62578, train loss = 5.0784
Iteration 2171, time = 1.64s, wps = 62428, train loss = 5.0641
Iteration 2191, time = 1.67s, wps = 61468, train loss = 5.0276
Iteration 2211, time = 1.65s, wps = 62143, train loss = 4.9608
Iteration 2231, time = 1.64s, wps = 62423, train loss = 4.9611
Iteration 2251, time = 1.65s, wps = 62138, train loss = 4.9712
Iteration 2271, time = 1.65s, wps = 62066, train loss = 4.9794
Iteration 2291, time = 1.65s, wps = 62211, train loss = 5.0230
Iteration 2311, time = 1.64s, wps = 62548, train loss = 5.0023
Iteration 2331, time = 1.67s, wps = 61170, train loss = 4.8706
Iteration 2351, time = 1.67s, wps = 61265, train loss = 4.9509
Iteration 2371, time = 1.63s, wps = 62673, train loss = 4.9685
Iteration 2391, time = 1.66s, wps = 61640, train loss = 4.9203
Iteration 2411, time = 1.64s, wps = 62573, train loss = 4.9808
Iteration 2431, time = 1.64s, wps = 62347, train loss = 4.9179
Iteration 2451, time = 1.67s, wps = 61442, train loss = 4.8250
Iteration 2471, time = 1.65s, wps = 62163, train loss = 4.9033
Iteration 2491, time = 1.66s, wps = 61688, train loss = 4.9063
Iteration 2511, time = 1.67s, wps = 61408, train loss = 4.9172
Iteration 2531, time = 1.63s, wps = 62931, train loss = 4.9032
Iteration 2551, time = 1.64s, wps = 62611, train loss = 4.9281
Iteration 2571, time = 1.65s, wps = 62026, train loss = 4.8681
Iteration 2591, time = 1.64s, wps = 62385, train loss = 4.9105
Iteration 2611, time = 1.63s, wps = 62735, train loss = 4.8756
Iteration 2631, time = 1.63s, wps = 62901, train loss = 4.8849
Iteration 2651, time = 1.62s, wps = 63050, train loss = 4.8975
Iteration 2671, time = 1.66s, wps = 61736, train loss = 4.8857
Iteration 2691, time = 1.62s, wps = 63120, train loss = 4.9238
Iteration 2711, time = 1.65s, wps = 61979, train loss = 4.8576
Iteration 2731, time = 1.65s, wps = 62100, train loss = 4.8590
Iteration 2751, time = 1.65s, wps = 62038, train loss = 4.9992
Iteration 2771, time = 1.66s, wps = 61731, train loss = 4.8892
Iteration 2791, time = 1.68s, wps = 61110, train loss = 4.8787
Iteration 2811, time = 1.65s, wps = 62174, train loss = 4.8241
Iteration 2831, time = 1.66s, wps = 61737, train loss = 4.8065
Iteration 2851, time = 1.66s, wps = 61772, train loss = 4.8811
Iteration 2871, time = 1.63s, wps = 62798, train loss = 4.8423
Iteration 2891, time = 1.65s, wps = 61964, train loss = 4.8276
Iteration 2911, time = 1.66s, wps = 61525, train loss = 4.8149
Iteration 2931, time = 1.68s, wps = 60782, train loss = 4.7974
Iteration 2951, time = 1.67s, wps = 61304, train loss = 4.7914
Iteration 2971, time = 1.62s, wps = 63248, train loss = 4.8194
Iteration 2991, time = 1.65s, wps = 62182, train loss = 4.8776
Iteration 3011, time = 1.65s, wps = 61932, train loss = 4.7531
Iteration 3031, time = 1.66s, wps = 61863, train loss = 4.8425
Iteration 3051, time = 1.66s, wps = 61654, train loss = 4.8052
Iteration 3071, time = 1.65s, wps = 62182, train loss = 4.8027
Iteration 3091, time = 1.66s, wps = 61619, train loss = 4.8071
Iteration 3111, time = 1.67s, wps = 61424, train loss = 4.8665
Iteration 3131, time = 1.63s, wps = 62841, train loss = 4.7791
Iteration 3151, time = 1.66s, wps = 61775, train loss = 4.7631
Iteration 3171, time = 1.65s, wps = 62132, train loss = 4.7743
Iteration 3191, time = 1.68s, wps = 60990, train loss = 4.7227
Iteration 3211, time = 1.64s, wps = 62282, train loss = 4.6999
Iteration 3231, time = 1.65s, wps = 61921, train loss = 4.7198
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00092-of-00100
Finished processing!
Iteration 3251, time = 3.76s, wps = 27219, train loss = 4.7018
Iteration 3271, time = 1.64s, wps = 62580, train loss = 4.8179
Iteration 3291, time = 1.66s, wps = 61819, train loss = 4.7786
Iteration 3311, time = 1.67s, wps = 61286, train loss = 4.8069
Iteration 3331, time = 1.64s, wps = 62376, train loss = 4.7424
Iteration 3351, time = 1.63s, wps = 62811, train loss = 4.8013
Iteration 3371, time = 1.67s, wps = 61295, train loss = 4.8420
Iteration 3391, time = 1.65s, wps = 61969, train loss = 4.7724
Iteration 3411, time = 1.66s, wps = 61730, train loss = 4.7340
Iteration 3431, time = 1.64s, wps = 62366, train loss = 4.6910
Iteration 3451, time = 1.65s, wps = 62107, train loss = 4.7646
Iteration 3471, time = 1.67s, wps = 61320, train loss = 4.7867
Iteration 3491, time = 1.66s, wps = 61682, train loss = 4.7802
Iteration 3511, time = 1.67s, wps = 61236, train loss = 4.6817

real    3m37.397s
user    9m47.417s
sys     1m45.353s
root@79928109e20f:/workspace/nvidia-examples/big_lstm# time python single_lm_train.py --mode=train --logdir=./logs --num_gpus=2 --datadir=./data/1-billion-word-language-modeling-benchmark-r13output
*****HYPER PARAMETERS*****
{'num_layers': 1, 'num_sampled': 8192, 'learning_rate': 0.2, 'num_delayed_steps': 150, 'num_shards': 8, 'batch_size': 128, 'emb_size': 512, 'run_profiler': False, 'do_summaries': False, 'max_time': 180, 'vocab_size': 793470, 'state_size': 2048, 'max_grad_norm': 10.0, 'average_params': True, 'projected_size': 512, 'keep_prob': 0.9, 'num_gpus': 2, 'optimizer': 0, 'num_steps': 20}
**************************
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/model_utils.py:33: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
Current time: 1553860262.053867
ALL VARIABLES
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:18: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/global_step:0 () <dtype: 'int32_ref'>
model/model/emb_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/emb_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/Adagrad:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/Adagrad:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/Adagrad:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_0/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_1/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_2/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_3/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_4/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_5/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_6/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_w_7/Adagrad:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/model/softmax_b/Adagrad:0 (793470,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_0/ExponentialMovingAverage:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/B/ExponentialMovingAverage:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/model/lstm_0/LSTMCell/W_P_0/ExponentialMovingAverage:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
TRAINABLE VARIABLES
model/emb_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/emb_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_0:0 (1024, 8192) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/B:0 (8192,) <dtype: 'float32_ref'> /gpu:0
model/lstm_0/LSTMCell/W_P_0:0 (2048, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_0:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_1:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_2:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_3:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_4:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_5:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_6:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_w_7:0 (99184, 512) <dtype: 'float32_ref'> /gpu:0
model/softmax_b:0 (793470,) <dtype: 'float32_ref'> /gpu:0
LOCAL VARIABLES
model/model/state_0_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:0
model/model_1/state_1_0:0 (128, 2560) <dtype: 'float32_ref'> /gpu:1
WARNING:tensorflow:From /opt/tensorflow/nvidia-examples/big_lstm/run_utils.py:32: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
2019-03-29 11:51:02.870447: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1405] Found device 0 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:05:00.0
totalMemory: 10.73GiB freeMemory: 10.26GiB
2019-03-29 11:51:02.964070: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1405] Found device 1 with properties:
name: GeForce RTX 2080 Ti major: 7 minor: 5 memoryClockRate(GHz): 1.635
pciBusID: 0000:09:00.0
totalMemory: 10.73GiB freeMemory: 10.57GiB
2019-03-29 11:51:02.964115: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1484] Adding visible gpu devices: 0, 1
2019-03-29 11:51:03.545136: I tensorflow/core/common_runtime/gpu/gpu_device.cc:965] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-03-29 11:51:03.545178: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971]      0 1
2019-03-29 11:51:03.545185: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] 0:   N Y
2019-03-29 11:51:03.545190: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] 1:   Y N
2019-03-29 11:51:03.546163: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1097] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 9902 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:05:00.0, compute capability: 7.5)
2019-03-29 11:51:03.648714: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1097] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10207 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:09:00.0, compute capability: 7.5)
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00053-of-00100
Finished processing!
Iteration 3521, time = 9.98s, wps = 513, train loss = 5.0070
Iteration 3522, time = 5.74s, wps = 892, train loss = 4.7614
Iteration 3523, time = 0.09s, wps = 56429, train loss = 4.7760
Iteration 3524, time = 0.09s, wps = 58940, train loss = 4.7155
Iteration 3525, time = 0.08s, wps = 61351, train loss = 4.6857
Iteration 3526, time = 0.08s, wps = 63455, train loss = 4.7255
Iteration 3527, time = 0.08s, wps = 64001, train loss = 4.7320
Iteration 3528, time = 0.08s, wps = 65530, train loss = 4.7115
Iteration 3529, time = 0.08s, wps = 63892, train loss = 4.6916
Iteration 3540, time = 0.87s, wps = 64500, train loss = 4.7226
Iteration 3560, time = 1.61s, wps = 63552, train loss = 4.7048
Iteration 3580, time = 1.61s, wps = 63675, train loss = 4.7643
Iteration 3600, time = 1.60s, wps = 63860, train loss = 4.6392
Iteration 3620, time = 1.60s, wps = 64179, train loss = 4.6796
Iteration 3640, time = 1.60s, wps = 64075, train loss = 4.6797
Iteration 3660, time = 1.65s, wps = 62239, train loss = 4.6629
Iteration 3680, time = 1.63s, wps = 62686, train loss = 4.7091
Iteration 3700, time = 1.63s, wps = 62890, train loss = 4.6847
Iteration 3720, time = 1.61s, wps = 63562, train loss = 4.6775
Iteration 3740, time = 1.61s, wps = 63650, train loss = 4.6216
Iteration 3760, time = 1.85s, wps = 55396, train loss = 4.7487
Iteration 3780, time = 1.64s, wps = 62540, train loss = 4.7086
Iteration 3800, time = 1.65s, wps = 62049, train loss = 4.7024
Iteration 3820, time = 1.66s, wps = 61796, train loss = 4.6615
Iteration 3840, time = 1.66s, wps = 61798, train loss = 4.6611
Iteration 3860, time = 1.64s, wps = 62405, train loss = 4.6420
Iteration 3880, time = 1.63s, wps = 62699, train loss = 4.6397
Iteration 3900, time = 1.68s, wps = 61045, train loss = 4.7358
Iteration 3920, time = 1.64s, wps = 62348, train loss = 4.7505
Iteration 3940, time = 1.65s, wps = 62159, train loss = 4.6529
Iteration 3960, time = 1.65s, wps = 62169, train loss = 4.6404
Iteration 3980, time = 1.67s, wps = 61421, train loss = 4.7213
Iteration 4000, time = 1.65s, wps = 62244, train loss = 4.6847
Iteration 4020, time = 1.64s, wps = 62622, train loss = 4.7498
Iteration 4040, time = 1.65s, wps = 62122, train loss = 4.6484
Iteration 4060, time = 1.68s, wps = 61130, train loss = 4.6062
Iteration 4080, time = 1.66s, wps = 61858, train loss = 4.6677
Iteration 4100, time = 1.63s, wps = 62930, train loss = 4.5484
Iteration 4120, time = 1.65s, wps = 62029, train loss = 4.6711
Iteration 4140, time = 1.65s, wps = 62213, train loss = 4.5211
Iteration 4160, time = 1.63s, wps = 62893, train loss = 4.7100
Iteration 4180, time = 1.65s, wps = 62128, train loss = 4.5801
Iteration 4200, time = 1.66s, wps = 61703, train loss = 4.7455
Iteration 4220, time = 1.62s, wps = 63304, train loss = 4.6638
Iteration 4240, time = 1.64s, wps = 62327, train loss = 4.5821
Iteration 4260, time = 1.65s, wps = 61928, train loss = 4.5761
Iteration 4280, time = 1.68s, wps = 60928, train loss = 4.6472
Iteration 4300, time = 1.62s, wps = 63239, train loss = 4.5846
Iteration 4320, time = 1.66s, wps = 61652, train loss = 4.6611
Iteration 4340, time = 1.63s, wps = 62675, train loss = 4.5531
Iteration 4360, time = 1.64s, wps = 62588, train loss = 4.5999
Iteration 4380, time = 1.64s, wps = 62494, train loss = 4.6468
Iteration 4400, time = 1.66s, wps = 61808, train loss = 4.5313
Iteration 4420, time = 1.67s, wps = 61259, train loss = 4.6791
Iteration 4440, time = 1.63s, wps = 62729, train loss = 4.6357
Iteration 4460, time = 1.64s, wps = 62605, train loss = 4.5895
Iteration 4480, time = 1.65s, wps = 62171, train loss = 4.6539
Iteration 4500, time = 1.62s, wps = 63026, train loss = 4.6012
Iteration 4520, time = 1.66s, wps = 61632, train loss = 4.5220
Iteration 4540, time = 1.64s, wps = 62544, train loss = 4.6001
Iteration 4560, time = 1.63s, wps = 62939, train loss = 4.5198
Iteration 4580, time = 1.66s, wps = 61570, train loss = 4.5689
Iteration 4600, time = 1.63s, wps = 62772, train loss = 4.5730
Iteration 4620, time = 1.66s, wps = 61795, train loss = 4.5432
Iteration 4640, time = 1.67s, wps = 61435, train loss = 4.5996
Iteration 4660, time = 1.69s, wps = 60762, train loss = 4.5975
Iteration 4680, time = 1.65s, wps = 62203, train loss = 4.5983
Iteration 4700, time = 1.67s, wps = 61273, train loss = 4.5403
Iteration 4720, time = 1.63s, wps = 62750, train loss = 4.5134
Iteration 4740, time = 1.66s, wps = 61665, train loss = 4.5639
Iteration 4760, time = 1.67s, wps = 61499, train loss = 4.5754
Iteration 4780, time = 1.64s, wps = 62330, train loss = 4.5249
Iteration 4800, time = 1.66s, wps = 61709, train loss = 4.5773
Iteration 4820, time = 1.64s, wps = 62384, train loss = 4.5644
Iteration 4840, time = 1.63s, wps = 62834, train loss = 4.5167
Iteration 4860, time = 1.69s, wps = 60467, train loss = 4.5110
Iteration 4880, time = 1.65s, wps = 62121, train loss = 4.5163
Iteration 4900, time = 1.65s, wps = 62046, train loss = 4.5504
Iteration 4920, time = 1.64s, wps = 62384, train loss = 4.5429
Iteration 4940, time = 1.63s, wps = 62804, train loss = 4.6321
Iteration 4960, time = 1.64s, wps = 62475, train loss = 4.5005
Iteration 4980, time = 1.66s, wps = 61691, train loss = 4.5553
Iteration 5000, time = 1.64s, wps = 62489, train loss = 4.6346
Iteration 5020, time = 1.66s, wps = 61561, train loss = 4.5395
Iteration 5040, time = 1.68s, wps = 61107, train loss = 4.6029
Iteration 5060, time = 1.63s, wps = 62767, train loss = 4.4318
Iteration 5080, time = 1.64s, wps = 62453, train loss = 4.5797
Processing file: ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00073-of-00100
Finished processing!
Iteration 5100, time = 3.80s, wps = 26968, train loss = 4.4609
Iteration 5120, time = 1.65s, wps = 62106, train loss = 4.5201
Iteration 5140, time = 1.65s, wps = 62165, train loss = 4.5071
Iteration 5160, time = 1.65s, wps = 62249, train loss = 4.4756
Iteration 5180, time = 1.65s, wps = 62221, train loss = 4.5557
Iteration 5200, time = 1.65s, wps = 62022, train loss = 4.5165
Iteration 5220, time = 1.64s, wps = 62317, train loss = 4.5632
Iteration 5240, time = 1.67s, wps = 61312, train loss = 4.4515
Iteration 5260, time = 1.66s, wps = 61549, train loss = 4.5081
Iteration 5280, time = 1.63s, wps = 62954, train loss = 4.6012
Iteration 5300, time = 1.66s, wps = 61589, train loss = 4.6120
Iteration 5320, time = 1.64s, wps = 62287, train loss = 4.6370
Iteration 5340, time = 1.64s, wps = 62579, train loss = 4.5261
Iteration 5360, time = 1.66s, wps = 61693, train loss = 4.5339

real    3m37.283s
user    9m46.130s
sys     1m45.911s
root@79928109e20f:/workspace/nvidia-examples/big_lstm# cat /etc/os-release
NAME="Ubuntu"
VERSION="16.04.5 LTS (Xenial Xerus)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 16.04.5 LTS"
VERSION_ID="16.04"
HOME_URL="http://www.ubuntu.com/"
SUPPORT_URL="http://help.ubuntu.com/"
BUG_REPORT_URL="http://bugs.launchpad.net/ubuntu/"
VERSION_CODENAME=xenial
UBUNTU_CODENAME=xenial
root@79928109e20f:/workspace/nvidia-examples/big_lstm# nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2018 NVIDIA Corporation
Built on Sat_Aug_25_21:08:01_CDT_2018
Cuda compilation tools, release 10.0, V10.0.130
root@79928109e20f:/workspace/nvidia-examples/big_lstm# cd data
root@79928109e20f:/workspace/nvidia-examples/big_lstm/data# ls
1-billion-word-language-modeling-benchmark-r13output
root@79928109e20f:/workspace/nvidia-examples/big_lstm/data# cd 1-billion-word-language-modeling-benchmark-r13output
root@79928109e20f:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# ls
1b_word_vocab.txt  heldout-monolingual.tokenized.shuffled
README             training-monolingual.tokenized.shuffled
root@79928109e20f:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output# cd training-monolingual.tokenized.shuffled
root@79928109e20f:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# ls
news.en-00001-of-00100  news.en-00034-of-00100  news.en-00067-of-00100
news.en-00002-of-00100  news.en-00035-of-00100  news.en-00068-of-00100
news.en-00003-of-00100  news.en-00036-of-00100  news.en-00069-of-00100
news.en-00004-of-00100  news.en-00037-of-00100  news.en-00070-of-00100
news.en-00005-of-00100  news.en-00038-of-00100  news.en-00071-of-00100
news.en-00006-of-00100  news.en-00039-of-00100  news.en-00072-of-00100
news.en-00007-of-00100  news.en-00040-of-00100  news.en-00073-of-00100
news.en-00008-of-00100  news.en-00041-of-00100  news.en-00074-of-00100
news.en-00009-of-00100  news.en-00042-of-00100  news.en-00075-of-00100
news.en-00010-of-00100  news.en-00043-of-00100  news.en-00076-of-00100
news.en-00011-of-00100  news.en-00044-of-00100  news.en-00077-of-00100
news.en-00012-of-00100  news.en-00045-of-00100  news.en-00078-of-00100
news.en-00013-of-00100  news.en-00046-of-00100  news.en-00079-of-00100
news.en-00014-of-00100  news.en-00047-of-00100  news.en-00080-of-00100
news.en-00015-of-00100  news.en-00048-of-00100  news.en-00081-of-00100
news.en-00016-of-00100  news.en-00049-of-00100  news.en-00082-of-00100
news.en-00017-of-00100  news.en-00050-of-00100  news.en-00083-of-00100
news.en-00018-of-00100  news.en-00051-of-00100  news.en-00084-of-00100
news.en-00019-of-00100  news.en-00052-of-00100  news.en-00085-of-00100
news.en-00020-of-00100  news.en-00053-of-00100  news.en-00086-of-00100
news.en-00021-of-00100  news.en-00054-of-00100  news.en-00087-of-00100
news.en-00022-of-00100  news.en-00055-of-00100  news.en-00088-of-00100
news.en-00023-of-00100  news.en-00056-of-00100  news.en-00089-of-00100
news.en-00024-of-00100  news.en-00057-of-00100  news.en-00090-of-00100
news.en-00025-of-00100  news.en-00058-of-00100  news.en-00091-of-00100
news.en-00026-of-00100  news.en-00059-of-00100  news.en-00092-of-00100
news.en-00027-of-00100  news.en-00060-of-00100  news.en-00093-of-00100
news.en-00028-of-00100  news.en-00061-of-00100  news.en-00094-of-00100
news.en-00029-of-00100  news.en-00062-of-00100  news.en-00095-of-00100
news.en-00030-of-00100  news.en-00063-of-00100  news.en-00096-of-00100
news.en-00031-of-00100  news.en-00064-of-00100  news.en-00097-of-00100
news.en-00032-of-00100  news.en-00065-of-00100  news.en-00098-of-00100
news.en-00033-of-00100  news.en-00066-of-00100  news.en-00099-of-00100
root@79928109e20f:/workspace/nvidia-examples/big_lstm/data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled# exit
exit
chibi@1604:~$ cat /etc/os-release
NAME="Ubuntu"
VERSION="16.04.6 LTS (Xenial Xerus)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 16.04.6 LTS"
VERSION_ID="16.04"
HOME_URL="http://www.ubuntu.com/"
SUPPORT_URL="http://help.ubuntu.com/"
BUG_REPORT_URL="http://bugs.launchpad.net/ubuntu/"
VERSION_CODENAME=xenial
UBUNTU_CODENAME=xenial
chibi@1604:~$ nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Fri_Feb__8_19:08:17_PST_2019
Cuda compilation tools, release 10.1, V10.1.105
chibi@1604:~$ sudo hddtemp /dev/sda
[sudo] chibi のパスワード:
/dev/sda: TS128GSSD370S: 23°C
chibi@1604:~$ nvidia-smi nvlink -c
GPU 0: GeForce RTX 2080 Ti (UUID: GPU-1ac935c2-557f-282e-14e5-3f749ffd63ac)
         Link 0, P2P is supported: true
         Link 0, Access to system memory supported: true
         Link 0, P2P atomics supported: true
         Link 0, System memory atomics supported: true
         Link 0, SLI is supported: true
         Link 0, Link is supported: false
         Link 1, P2P is supported: true
         Link 1, Access to system memory supported: true
         Link 1, P2P atomics supported: true
         Link 1, System memory atomics supported: true
         Link 1, SLI is supported: true
         Link 1, Link is supported: false
GPU 1: GeForce RTX 2080 Ti (UUID: GPU-13277ce5-e1e9-0cb1-8cee-6c9e6618e774)
         Link 0, P2P is supported: true
         Link 0, Access to system memory supported: true
         Link 0, P2P atomics supported: true
         Link 0, System memory atomics supported: true
         Link 0, SLI is supported: true
         Link 0, Link is supported: false
         Link 1, P2P is supported: true
         Link 1, Access to system memory supported: true
         Link 1, P2P atomics supported: true
         Link 1, System memory atomics supported: true
         Link 1, SLI is supported: true
         Link 1, Link is supported: false
chibi@1604:~$