cd /go-elf/ELF/scripts source devmode_set_pythonpath.sh cd /go-elf/ELF/scripts/elfgames/go/ ./gtp.sh ../../../pretrained-go-19x19-v2.bin --loglevel off --gpu 0 --num_block 20 --dim 256 --mcts_puct 1.50 --batchsize 256 --mcts_rollout_per_batch 16 --mcts_threads 2 --mcts_rollout_per_thread 8192 --resign_thres 0.05 --mcts_virtual_loss 1
解决代码问题
v.pin_memory()处的cuda out of memory问题
加了print (v)语句。go-elf/ELF/src_py/elf/utils_elf.py 44行 print (v) if gpu is not None: with torch.cuda.device(gpu): v = v.pin_memory() v.fill_(1) 似乎是偶发故障,后续没有再复现
> /go-elf/ELF/src_py/elf/utils_elf.py(192)copy_from() -> for k, v in this_src.items(): (Pdb) y *** NameError: name 'y' is not defined (Pdb) q Traceback (most recent call last): File "df_console.py", line 86, in <module> main() File "df_console.py", line 79, in main GC.run() File "/home/majiang/hd/opensource/ELF_GO/builded_env/go-elf/ELF/src_py/elf/utils_elf.py", line 436, in run self._call(smem, *args, **kwargs) File "/home/majiang/hd/opensource/ELF_GO/builded_env/go-elf/ELF/src_py/elf/utils_elf.py", line 404, in _call keys_extra, keys_missing = sel_reply.copy_from(reply) File "/home/majiang/hd/opensource/ELF_GO/builded_env/go-elf/ELF/src_py/elf/utils_elf.py", line 192, in copy_from for k, v in this_src.items(): File "/home/majiang/hd/opensource/ELF_GO/builded_env/go-elf/ELF/src_py/elf/utils_elf.py", line 192, in copy_from for k, v in this_src.items(): File "/home/majiang/hd/opensource/ELF_GO/builded_env/miniconda3/lib/python3.7/bdb.py", line 88, in trace_dispatch return self.dispatch_line(frame) File "/home/majiang/hd/opensource/ELF_GO/builded_env/miniconda3/lib/python3.7/bdb.py", line 113, in dispatch_line if self.quitting: raise BdbQuit