-
Notifications
You must be signed in to change notification settings - Fork 76
/
nextvit_small_in1k_384.log
4080 lines (4080 loc) · 466 KB
/
nextvit_small_in1k_384.log
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
Epoch: [0] [ 0/1251] eta: 8:37:47 lr: 0.000010 loss: 3.4745 (3.4745) time: 24.8341 data: 5.5879 max mem: 29510
Epoch: [0] [ 10/1251] eta: 0:56:37 lr: 0.000010 loss: 3.2861 (3.1131) time: 2.7375 data: 0.5082 max mem: 29510
Epoch: [0] [ 20/1251] eta: 0:34:32 lr: 0.000010 loss: 3.2273 (3.1118) time: 0.5262 data: 0.0002 max mem: 29510
Epoch: [0] [ 30/1251] eta: 0:26:35 lr: 0.000010 loss: 3.1473 (3.1004) time: 0.5201 data: 0.0002 max mem: 29510
Epoch: [0] [ 40/1251] eta: 0:22:30 lr: 0.000010 loss: 3.0176 (3.0299) time: 0.5183 data: 0.0003 max mem: 29510
Epoch: [0] [ 50/1251] eta: 0:20:00 lr: 0.000010 loss: 2.9639 (3.0202) time: 0.5240 data: 0.0002 max mem: 29510
Epoch: [0] [ 60/1251] eta: 0:18:16 lr: 0.000010 loss: 2.9479 (2.9966) time: 0.5212 data: 0.0002 max mem: 29510
Epoch: [0] [ 70/1251] eta: 0:17:01 lr: 0.000010 loss: 3.0694 (2.9862) time: 0.5206 data: 0.0002 max mem: 29510
Epoch: [0] [ 80/1251] eta: 0:16:03 lr: 0.000010 loss: 3.0693 (2.9615) time: 0.5241 data: 0.0002 max mem: 29510
Epoch: [0] [ 90/1251] eta: 0:15:17 lr: 0.000010 loss: 3.0312 (2.9719) time: 0.5247 data: 0.0002 max mem: 29510
Epoch: [0] [ 100/1251] eta: 0:14:38 lr: 0.000010 loss: 3.0287 (2.9555) time: 0.5221 data: 0.0002 max mem: 29510
Epoch: [0] [ 110/1251] eta: 0:14:05 lr: 0.000010 loss: 3.0287 (2.9558) time: 0.5187 data: 0.0002 max mem: 29510
Epoch: [0] [ 120/1251] eta: 0:13:38 lr: 0.000010 loss: 3.0001 (2.9427) time: 0.5247 data: 0.0002 max mem: 29510
Epoch: [0] [ 130/1251] eta: 0:13:13 lr: 0.000010 loss: 2.9132 (2.9296) time: 0.5233 data: 0.0003 max mem: 29510
Epoch: [0] [ 140/1251] eta: 0:12:51 lr: 0.000010 loss: 2.9244 (2.9313) time: 0.5169 data: 0.0003 max mem: 29510
Epoch: [0] [ 150/1251] eta: 0:12:31 lr: 0.000010 loss: 3.0356 (2.9237) time: 0.5196 data: 0.0003 max mem: 29510
Epoch: [0] [ 160/1251] eta: 0:12:14 lr: 0.000010 loss: 2.8395 (2.9201) time: 0.5227 data: 0.0002 max mem: 29510
Epoch: [0] [ 170/1251] eta: 0:11:57 lr: 0.000010 loss: 2.7756 (2.9036) time: 0.5191 data: 0.0002 max mem: 29510
Epoch: [0] [ 180/1251] eta: 0:11:42 lr: 0.000010 loss: 2.7298 (2.8932) time: 0.5172 data: 0.0002 max mem: 29510
Epoch: [0] [ 190/1251] eta: 0:11:28 lr: 0.000010 loss: 2.6888 (2.8765) time: 0.5214 data: 0.0002 max mem: 29510
Epoch: [0] [ 200/1251] eta: 0:11:15 lr: 0.000010 loss: 2.8055 (2.8799) time: 0.5236 data: 0.0002 max mem: 29510
Epoch: [0] [ 210/1251] eta: 0:11:03 lr: 0.000010 loss: 2.9426 (2.8697) time: 0.5239 data: 0.0002 max mem: 29510
Epoch: [0] [ 220/1251] eta: 0:10:51 lr: 0.000010 loss: 2.7032 (2.8608) time: 0.5235 data: 0.0003 max mem: 29510
Epoch: [0] [ 230/1251] eta: 0:10:40 lr: 0.000010 loss: 2.8248 (2.8597) time: 0.5223 data: 0.0002 max mem: 29510
Epoch: [0] [ 240/1251] eta: 0:10:29 lr: 0.000010 loss: 2.6884 (2.8469) time: 0.5220 data: 0.0002 max mem: 29510
Epoch: [0] [ 250/1251] eta: 0:10:19 lr: 0.000010 loss: 2.7931 (2.8441) time: 0.5196 data: 0.0002 max mem: 29510
Epoch: [0] [ 260/1251] eta: 0:10:09 lr: 0.000010 loss: 2.8007 (2.8327) time: 0.5231 data: 0.0002 max mem: 29510
Epoch: [0] [ 270/1251] eta: 0:10:00 lr: 0.000010 loss: 2.5055 (2.8249) time: 0.5263 data: 0.0003 max mem: 29510
Epoch: [0] [ 280/1251] eta: 0:09:50 lr: 0.000010 loss: 2.6828 (2.8249) time: 0.5230 data: 0.0002 max mem: 29510
Epoch: [0] [ 290/1251] eta: 0:09:41 lr: 0.000010 loss: 2.9307 (2.8207) time: 0.5179 data: 0.0002 max mem: 29510
Epoch: [0] [ 300/1251] eta: 0:09:32 lr: 0.000010 loss: 2.6761 (2.8166) time: 0.5164 data: 0.0002 max mem: 29510
Epoch: [0] [ 310/1251] eta: 0:09:24 lr: 0.000010 loss: 2.8388 (2.8206) time: 0.5190 data: 0.0002 max mem: 29510
Epoch: [0] [ 320/1251] eta: 0:09:16 lr: 0.000010 loss: 2.8388 (2.8201) time: 0.5207 data: 0.0003 max mem: 29510
Epoch: [0] [ 330/1251] eta: 0:09:08 lr: 0.000010 loss: 2.7737 (2.8093) time: 0.5248 data: 0.0003 max mem: 29510
Epoch: [0] [ 340/1251] eta: 0:09:00 lr: 0.000010 loss: 2.9173 (2.8142) time: 0.5241 data: 0.0002 max mem: 29510
Epoch: [0] [ 350/1251] eta: 0:08:52 lr: 0.000010 loss: 2.9891 (2.8119) time: 0.5227 data: 0.0002 max mem: 29510
Epoch: [0] [ 360/1251] eta: 0:08:44 lr: 0.000010 loss: 2.8655 (2.8122) time: 0.5163 data: 0.0002 max mem: 29510
Epoch: [0] [ 370/1251] eta: 0:08:37 lr: 0.000010 loss: 2.9517 (2.8125) time: 0.5177 data: 0.0002 max mem: 29510
Epoch: [0] [ 380/1251] eta: 0:08:29 lr: 0.000010 loss: 2.9991 (2.8162) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [0] [ 390/1251] eta: 0:08:22 lr: 0.000010 loss: 2.8947 (2.8077) time: 0.5275 data: 0.0002 max mem: 29510
Epoch: [0] [ 400/1251] eta: 0:08:15 lr: 0.000010 loss: 2.7214 (2.8047) time: 0.5246 data: 0.0002 max mem: 29510
Epoch: [0] [ 410/1251] eta: 0:08:08 lr: 0.000010 loss: 2.8240 (2.8037) time: 0.5206 data: 0.0002 max mem: 29510
Epoch: [0] [ 420/1251] eta: 0:08:01 lr: 0.000010 loss: 3.0299 (2.8079) time: 0.5241 data: 0.0002 max mem: 29510
Epoch: [0] [ 430/1251] eta: 0:07:54 lr: 0.000010 loss: 2.9758 (2.8092) time: 0.5214 data: 0.0002 max mem: 29510
Epoch: [0] [ 440/1251] eta: 0:07:47 lr: 0.000010 loss: 2.9547 (2.8080) time: 0.5196 data: 0.0002 max mem: 29510
Epoch: [0] [ 450/1251] eta: 0:07:40 lr: 0.000010 loss: 2.8188 (2.8047) time: 0.5161 data: 0.0002 max mem: 29510
Epoch: [0] [ 460/1251] eta: 0:07:34 lr: 0.000010 loss: 2.6399 (2.7961) time: 0.5157 data: 0.0002 max mem: 29510
Epoch: [0] [ 470/1251] eta: 0:07:27 lr: 0.000010 loss: 2.6884 (2.7975) time: 0.5187 data: 0.0002 max mem: 29510
Epoch: [0] [ 480/1251] eta: 0:07:20 lr: 0.000010 loss: 2.8993 (2.7998) time: 0.5167 data: 0.0002 max mem: 29510
Epoch: [0] [ 490/1251] eta: 0:07:14 lr: 0.000010 loss: 2.8878 (2.8005) time: 0.5195 data: 0.0002 max mem: 29510
Epoch: [0] [ 500/1251] eta: 0:07:08 lr: 0.000010 loss: 2.8284 (2.8012) time: 0.5229 data: 0.0002 max mem: 29510
Epoch: [0] [ 510/1251] eta: 0:07:01 lr: 0.000010 loss: 2.8284 (2.8013) time: 0.5201 data: 0.0002 max mem: 29510
Epoch: [0] [ 520/1251] eta: 0:06:55 lr: 0.000010 loss: 2.8527 (2.7978) time: 0.5245 data: 0.0002 max mem: 29510
Epoch: [0] [ 530/1251] eta: 0:06:49 lr: 0.000010 loss: 2.7476 (2.7929) time: 0.5336 data: 0.0002 max mem: 29510
Epoch: [0] [ 540/1251] eta: 0:06:42 lr: 0.000010 loss: 2.8891 (2.7964) time: 0.5287 data: 0.0002 max mem: 29510
Epoch: [0] [ 550/1251] eta: 0:06:36 lr: 0.000010 loss: 2.8195 (2.7892) time: 0.5232 data: 0.0002 max mem: 29510
Epoch: [0] [ 560/1251] eta: 0:06:30 lr: 0.000010 loss: 2.5184 (2.7887) time: 0.5213 data: 0.0002 max mem: 29510
Epoch: [0] [ 570/1251] eta: 0:06:24 lr: 0.000010 loss: 2.7163 (2.7884) time: 0.5207 data: 0.0002 max mem: 29510
Epoch: [0] [ 580/1251] eta: 0:06:18 lr: 0.000010 loss: 2.6515 (2.7865) time: 0.5175 data: 0.0002 max mem: 29510
Epoch: [0] [ 590/1251] eta: 0:06:11 lr: 0.000010 loss: 2.7255 (2.7883) time: 0.5148 data: 0.0002 max mem: 29510
Epoch: [0] [ 600/1251] eta: 0:06:05 lr: 0.000010 loss: 2.7985 (2.7859) time: 0.5199 data: 0.0002 max mem: 29510
Epoch: [0] [ 610/1251] eta: 0:05:59 lr: 0.000010 loss: 2.7985 (2.7859) time: 0.5185 data: 0.0002 max mem: 29510
Epoch: [0] [ 620/1251] eta: 0:05:53 lr: 0.000010 loss: 2.8090 (2.7847) time: 0.5227 data: 0.0002 max mem: 29510
Epoch: [0] [ 630/1251] eta: 0:05:47 lr: 0.000010 loss: 2.8277 (2.7862) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [0] [ 640/1251] eta: 0:05:41 lr: 0.000010 loss: 2.7510 (2.7865) time: 0.5214 data: 0.0002 max mem: 29510
Epoch: [0] [ 650/1251] eta: 0:05:35 lr: 0.000010 loss: 2.6925 (2.7841) time: 0.5228 data: 0.0002 max mem: 29510
Epoch: [0] [ 660/1251] eta: 0:05:30 lr: 0.000010 loss: 2.8323 (2.7838) time: 0.5244 data: 0.0002 max mem: 29510
Epoch: [0] [ 670/1251] eta: 0:05:24 lr: 0.000010 loss: 2.8030 (2.7800) time: 0.5229 data: 0.0003 max mem: 29510
Epoch: [0] [ 680/1251] eta: 0:05:18 lr: 0.000010 loss: 2.2492 (2.7764) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [0] [ 690/1251] eta: 0:05:12 lr: 0.000010 loss: 2.7411 (2.7760) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [0] [ 700/1251] eta: 0:05:06 lr: 0.000010 loss: 2.7282 (2.7738) time: 0.5221 data: 0.0002 max mem: 29510
Epoch: [0] [ 710/1251] eta: 0:05:00 lr: 0.000010 loss: 2.7282 (2.7719) time: 0.5274 data: 0.0002 max mem: 29510
Epoch: [0] [ 720/1251] eta: 0:04:54 lr: 0.000010 loss: 2.8265 (2.7741) time: 0.5238 data: 0.0002 max mem: 29510
Epoch: [0] [ 730/1251] eta: 0:04:49 lr: 0.000010 loss: 3.0064 (2.7768) time: 0.5242 data: 0.0003 max mem: 29510
Epoch: [0] [ 740/1251] eta: 0:04:43 lr: 0.000010 loss: 3.0064 (2.7760) time: 0.5272 data: 0.0003 max mem: 29510
Epoch: [0] [ 750/1251] eta: 0:04:37 lr: 0.000010 loss: 2.8361 (2.7736) time: 0.5229 data: 0.0002 max mem: 29510
Epoch: [0] [ 760/1251] eta: 0:04:31 lr: 0.000010 loss: 2.8499 (2.7723) time: 0.5230 data: 0.0002 max mem: 29510
Epoch: [0] [ 770/1251] eta: 0:04:26 lr: 0.000010 loss: 2.8424 (2.7733) time: 0.5244 data: 0.0002 max mem: 29510
Epoch: [0] [ 780/1251] eta: 0:04:20 lr: 0.000010 loss: 2.8424 (2.7749) time: 0.5232 data: 0.0002 max mem: 29510
Epoch: [0] [ 790/1251] eta: 0:04:14 lr: 0.000010 loss: 2.8446 (2.7756) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [0] [ 800/1251] eta: 0:04:09 lr: 0.000010 loss: 2.8446 (2.7758) time: 0.5238 data: 0.0002 max mem: 29510
Epoch: [0] [ 810/1251] eta: 0:04:03 lr: 0.000010 loss: 2.9037 (2.7769) time: 0.5229 data: 0.0002 max mem: 29510
Epoch: [0] [ 820/1251] eta: 0:03:57 lr: 0.000010 loss: 2.9096 (2.7781) time: 0.5195 data: 0.0002 max mem: 29510
Epoch: [0] [ 830/1251] eta: 0:03:52 lr: 0.000010 loss: 2.8601 (2.7773) time: 0.5185 data: 0.0002 max mem: 29510
Epoch: [0] [ 840/1251] eta: 0:03:46 lr: 0.000010 loss: 2.7029 (2.7750) time: 0.5228 data: 0.0002 max mem: 29510
Epoch: [0] [ 850/1251] eta: 0:03:40 lr: 0.000010 loss: 2.5984 (2.7743) time: 0.5240 data: 0.0002 max mem: 29510
Epoch: [0] [ 860/1251] eta: 0:03:35 lr: 0.000010 loss: 2.8014 (2.7746) time: 0.5204 data: 0.0003 max mem: 29510
Epoch: [0] [ 870/1251] eta: 0:03:29 lr: 0.000010 loss: 2.8014 (2.7733) time: 0.5208 data: 0.0002 max mem: 29510
Epoch: [0] [ 880/1251] eta: 0:03:23 lr: 0.000010 loss: 2.6490 (2.7725) time: 0.5274 data: 0.0002 max mem: 29510
Epoch: [0] [ 890/1251] eta: 0:03:18 lr: 0.000010 loss: 2.6490 (2.7709) time: 0.5247 data: 0.0002 max mem: 29510
Epoch: [0] [ 900/1251] eta: 0:03:12 lr: 0.000010 loss: 2.8285 (2.7708) time: 0.5169 data: 0.0002 max mem: 29510
Epoch: [0] [ 910/1251] eta: 0:03:07 lr: 0.000010 loss: 2.8556 (2.7712) time: 0.5165 data: 0.0002 max mem: 29510
Epoch: [0] [ 920/1251] eta: 0:03:01 lr: 0.000010 loss: 2.7325 (2.7694) time: 0.5177 data: 0.0002 max mem: 29510
Epoch: [0] [ 930/1251] eta: 0:02:55 lr: 0.000010 loss: 2.7085 (2.7686) time: 0.5187 data: 0.0002 max mem: 29510
Epoch: [0] [ 940/1251] eta: 0:02:50 lr: 0.000010 loss: 2.6869 (2.7669) time: 0.5204 data: 0.0002 max mem: 29510
Epoch: [0] [ 950/1251] eta: 0:02:44 lr: 0.000010 loss: 2.7953 (2.7684) time: 0.5198 data: 0.0002 max mem: 29510
Epoch: [0] [ 960/1251] eta: 0:02:39 lr: 0.000010 loss: 2.9683 (2.7679) time: 0.5212 data: 0.0003 max mem: 29510
Epoch: [0] [ 970/1251] eta: 0:02:33 lr: 0.000010 loss: 2.8983 (2.7665) time: 0.5239 data: 0.0002 max mem: 29510
Epoch: [0] [ 980/1251] eta: 0:02:28 lr: 0.000010 loss: 2.8918 (2.7684) time: 0.5202 data: 0.0002 max mem: 29510
Epoch: [0] [ 990/1251] eta: 0:02:22 lr: 0.000010 loss: 2.8643 (2.7662) time: 0.5168 data: 0.0002 max mem: 29510
Epoch: [0] [1000/1251] eta: 0:02:17 lr: 0.000010 loss: 2.7876 (2.7652) time: 0.5220 data: 0.0002 max mem: 29510
Epoch: [0] [1010/1251] eta: 0:02:11 lr: 0.000010 loss: 2.7876 (2.7650) time: 0.5214 data: 0.0002 max mem: 29510
Epoch: [0] [1020/1251] eta: 0:02:06 lr: 0.000010 loss: 2.8585 (2.7651) time: 0.5191 data: 0.0002 max mem: 29510
Epoch: [0] [1030/1251] eta: 0:02:00 lr: 0.000010 loss: 2.6639 (2.7640) time: 0.5212 data: 0.0002 max mem: 29510
Epoch: [0] [1040/1251] eta: 0:01:55 lr: 0.000010 loss: 2.6547 (2.7648) time: 0.5227 data: 0.0002 max mem: 29510
Epoch: [0] [1050/1251] eta: 0:01:49 lr: 0.000010 loss: 2.8846 (2.7663) time: 0.5245 data: 0.0002 max mem: 29510
Epoch: [0] [1060/1251] eta: 0:01:44 lr: 0.000010 loss: 2.9037 (2.7663) time: 0.5206 data: 0.0002 max mem: 29510
Epoch: [0] [1070/1251] eta: 0:01:38 lr: 0.000010 loss: 2.7546 (2.7640) time: 0.5181 data: 0.0002 max mem: 29510
Epoch: [0] [1080/1251] eta: 0:01:33 lr: 0.000010 loss: 2.7161 (2.7636) time: 0.5167 data: 0.0003 max mem: 29510
Epoch: [0] [1090/1251] eta: 0:01:27 lr: 0.000010 loss: 2.8984 (2.7651) time: 0.5163 data: 0.0019 max mem: 29510
Epoch: [0] [1100/1251] eta: 0:01:22 lr: 0.000010 loss: 2.8984 (2.7651) time: 0.5193 data: 0.0019 max mem: 29510
Epoch: [0] [1110/1251] eta: 0:01:16 lr: 0.000010 loss: 2.8638 (2.7627) time: 0.5198 data: 0.0002 max mem: 29510
Epoch: [0] [1120/1251] eta: 0:01:11 lr: 0.000010 loss: 2.8510 (2.7635) time: 0.5176 data: 0.0002 max mem: 29510
Epoch: [0] [1130/1251] eta: 0:01:05 lr: 0.000010 loss: 2.8359 (2.7626) time: 0.5156 data: 0.0003 max mem: 29510
Epoch: [0] [1140/1251] eta: 0:01:00 lr: 0.000010 loss: 2.7568 (2.7635) time: 0.5180 data: 0.0002 max mem: 29510
Epoch: [0] [1150/1251] eta: 0:00:54 lr: 0.000010 loss: 2.9646 (2.7655) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [0] [1160/1251] eta: 0:00:49 lr: 0.000010 loss: 2.9446 (2.7662) time: 0.5221 data: 0.0002 max mem: 29510
Epoch: [0] [1170/1251] eta: 0:00:43 lr: 0.000010 loss: 2.8811 (2.7663) time: 0.5183 data: 0.0002 max mem: 29510
Epoch: [0] [1180/1251] eta: 0:00:38 lr: 0.000010 loss: 2.8811 (2.7658) time: 0.5161 data: 0.0002 max mem: 29510
Epoch: [0] [1190/1251] eta: 0:00:33 lr: 0.000010 loss: 2.7965 (2.7635) time: 0.5182 data: 0.0002 max mem: 29510
Epoch: [0] [1200/1251] eta: 0:00:27 lr: 0.000010 loss: 2.6947 (2.7633) time: 0.5231 data: 0.0002 max mem: 29510
Epoch: [0] [1210/1251] eta: 0:00:22 lr: 0.000010 loss: 2.8885 (2.7622) time: 0.5201 data: 0.0002 max mem: 29510
Epoch: [0] [1220/1251] eta: 0:00:16 lr: 0.000010 loss: 2.9472 (2.7637) time: 0.5185 data: 0.0002 max mem: 29510
Epoch: [0] [1230/1251] eta: 0:00:11 lr: 0.000010 loss: 2.8201 (2.7614) time: 0.5191 data: 0.0002 max mem: 29510
Epoch: [0] [1240/1251] eta: 0:00:05 lr: 0.000010 loss: 2.7345 (2.7609) time: 0.5081 data: 0.0008 max mem: 29510
Epoch: [0] [1250/1251] eta: 0:00:00 lr: 0.000010 loss: 2.7502 (2.7602) time: 0.4937 data: 0.0008 max mem: 29510
Epoch: [0] Total time: 0:11:16 (0.5409 s / it)
Averaged stats: lr: 0.000010 loss: 2.7502 (2.7547)
Test: [ 0/25] eta: 0:05:56 loss: 0.6451 (0.6451) acc1: 90.8000 (90.8000) acc5: 98.8000 (98.8000) time: 14.2705 data: 6.1541 max mem: 29510
Test: [10/25] eta: 0:00:23 loss: 0.7874 (0.7775) acc1: 86.8000 (86.5091) acc5: 97.6000 (97.8545) time: 1.5609 data: 0.5597 max mem: 29510
Test: [20/25] eta: 0:00:04 loss: 0.9033 (0.9004) acc1: 81.6000 (83.2381) acc5: 96.4000 (96.7619) time: 0.2978 data: 0.0002 max mem: 29510
Test: [24/25] eta: 0:00:00 loss: 0.9600 (0.9119) acc1: 80.8000 (82.7680) acc5: 96.4000 (96.7200) time: 0.2984 data: 0.0002 max mem: 29510
Test: Total time: 0:00:21 (0.8641 s / it)
* Acc@1 83.270 Acc@5 96.696 loss 0.904
Accuracy of the network on the 50000 test images: 83.3%
Max accuracy: 83.27%
Epoch: [1] [ 0/1251] eta: 1:27:50 lr: 0.000010 loss: 2.5545 (2.5545) time: 4.2131 data: 3.3134 max mem: 29510
Epoch: [1] [ 10/1251] eta: 0:17:51 lr: 0.000010 loss: 2.7343 (2.6843) time: 0.8635 data: 0.3015 max mem: 29510
Epoch: [1] [ 20/1251] eta: 0:14:28 lr: 0.000010 loss: 2.7343 (2.6308) time: 0.5302 data: 0.0002 max mem: 29510
Epoch: [1] [ 30/1251] eta: 0:13:13 lr: 0.000010 loss: 2.8428 (2.6993) time: 0.5327 data: 0.0002 max mem: 29510
Epoch: [1] [ 40/1251] eta: 0:12:30 lr: 0.000010 loss: 2.8191 (2.6737) time: 0.5301 data: 0.0002 max mem: 29510
Epoch: [1] [ 50/1251] eta: 0:12:01 lr: 0.000010 loss: 2.7445 (2.6589) time: 0.5248 data: 0.0002 max mem: 29510
Epoch: [1] [ 60/1251] eta: 0:11:42 lr: 0.000010 loss: 2.7445 (2.6501) time: 0.5287 data: 0.0003 max mem: 29510
Epoch: [1] [ 70/1251] eta: 0:11:27 lr: 0.000010 loss: 2.6107 (2.6319) time: 0.5335 data: 0.0003 max mem: 29510
Epoch: [1] [ 80/1251] eta: 0:11:12 lr: 0.000010 loss: 2.7170 (2.6594) time: 0.5272 data: 0.0003 max mem: 29510
Epoch: [1] [ 90/1251] eta: 0:11:00 lr: 0.000010 loss: 2.9783 (2.6874) time: 0.5212 data: 0.0002 max mem: 29510
Epoch: [1] [ 100/1251] eta: 0:10:49 lr: 0.000010 loss: 2.9634 (2.7003) time: 0.5242 data: 0.0002 max mem: 29510
Epoch: [1] [ 110/1251] eta: 0:10:40 lr: 0.000010 loss: 2.9279 (2.7060) time: 0.5265 data: 0.0003 max mem: 29510
Epoch: [1] [ 120/1251] eta: 0:10:30 lr: 0.000010 loss: 2.8733 (2.7132) time: 0.5235 data: 0.0003 max mem: 29510
Epoch: [1] [ 130/1251] eta: 0:10:22 lr: 0.000010 loss: 2.8468 (2.7123) time: 0.5256 data: 0.0003 max mem: 29510
Epoch: [1] [ 140/1251] eta: 0:10:14 lr: 0.000010 loss: 2.8086 (2.7144) time: 0.5221 data: 0.0003 max mem: 29510
Epoch: [1] [ 150/1251] eta: 0:10:06 lr: 0.000010 loss: 2.7791 (2.7086) time: 0.5227 data: 0.0002 max mem: 29510
Epoch: [1] [ 160/1251] eta: 0:09:59 lr: 0.000010 loss: 2.7772 (2.7134) time: 0.5272 data: 0.0002 max mem: 29510
Epoch: [1] [ 170/1251] eta: 0:09:52 lr: 0.000010 loss: 2.8623 (2.7276) time: 0.5234 data: 0.0002 max mem: 29510
Epoch: [1] [ 180/1251] eta: 0:09:45 lr: 0.000010 loss: 2.9492 (2.7323) time: 0.5214 data: 0.0002 max mem: 29510
Epoch: [1] [ 190/1251] eta: 0:09:38 lr: 0.000010 loss: 2.8278 (2.7358) time: 0.5178 data: 0.0002 max mem: 29510
Epoch: [1] [ 200/1251] eta: 0:09:31 lr: 0.000010 loss: 2.7343 (2.7215) time: 0.5201 data: 0.0002 max mem: 29510
Epoch: [1] [ 210/1251] eta: 0:09:25 lr: 0.000010 loss: 2.2755 (2.7071) time: 0.5266 data: 0.0002 max mem: 29510
Epoch: [1] [ 220/1251] eta: 0:09:19 lr: 0.000010 loss: 2.3451 (2.7061) time: 0.5267 data: 0.0002 max mem: 29510
Epoch: [1] [ 230/1251] eta: 0:09:12 lr: 0.000010 loss: 2.9020 (2.7180) time: 0.5226 data: 0.0002 max mem: 29510
Epoch: [1] [ 240/1251] eta: 0:09:06 lr: 0.000010 loss: 2.9020 (2.7202) time: 0.5218 data: 0.0002 max mem: 29510
Epoch: [1] [ 250/1251] eta: 0:09:00 lr: 0.000010 loss: 2.8078 (2.7227) time: 0.5256 data: 0.0002 max mem: 29510
Epoch: [1] [ 260/1251] eta: 0:08:54 lr: 0.000010 loss: 2.8575 (2.7253) time: 0.5249 data: 0.0002 max mem: 29510
Epoch: [1] [ 270/1251] eta: 0:08:48 lr: 0.000010 loss: 2.8345 (2.7296) time: 0.5228 data: 0.0002 max mem: 29510
Epoch: [1] [ 280/1251] eta: 0:08:42 lr: 0.000010 loss: 2.8620 (2.7327) time: 0.5260 data: 0.0002 max mem: 29510
Epoch: [1] [ 290/1251] eta: 0:08:36 lr: 0.000010 loss: 2.6989 (2.7252) time: 0.5258 data: 0.0003 max mem: 29510
Epoch: [1] [ 300/1251] eta: 0:08:31 lr: 0.000010 loss: 2.6774 (2.7249) time: 0.5259 data: 0.0003 max mem: 29510
Epoch: [1] [ 310/1251] eta: 0:08:25 lr: 0.000010 loss: 2.7765 (2.7230) time: 0.5250 data: 0.0002 max mem: 29510
Epoch: [1] [ 320/1251] eta: 0:08:19 lr: 0.000010 loss: 2.6410 (2.7192) time: 0.5230 data: 0.0002 max mem: 29510
Epoch: [1] [ 330/1251] eta: 0:08:13 lr: 0.000010 loss: 2.7432 (2.7191) time: 0.5219 data: 0.0002 max mem: 29510
Epoch: [1] [ 340/1251] eta: 0:08:08 lr: 0.000010 loss: 2.8015 (2.7184) time: 0.5249 data: 0.0003 max mem: 29510
Epoch: [1] [ 350/1251] eta: 0:08:02 lr: 0.000010 loss: 2.8015 (2.7175) time: 0.5247 data: 0.0003 max mem: 29510
Epoch: [1] [ 360/1251] eta: 0:07:56 lr: 0.000010 loss: 2.7165 (2.7164) time: 0.5181 data: 0.0002 max mem: 29510
Epoch: [1] [ 370/1251] eta: 0:07:51 lr: 0.000010 loss: 2.5509 (2.7093) time: 0.5198 data: 0.0002 max mem: 29510
Epoch: [1] [ 380/1251] eta: 0:07:45 lr: 0.000010 loss: 2.5509 (2.7095) time: 0.5214 data: 0.0002 max mem: 29510
Epoch: [1] [ 390/1251] eta: 0:07:39 lr: 0.000010 loss: 2.7906 (2.7111) time: 0.5223 data: 0.0002 max mem: 29510
Epoch: [1] [ 400/1251] eta: 0:07:34 lr: 0.000010 loss: 2.9084 (2.7119) time: 0.5304 data: 0.0003 max mem: 29510
Epoch: [1] [ 410/1251] eta: 0:07:28 lr: 0.000010 loss: 2.8651 (2.7135) time: 0.5267 data: 0.0002 max mem: 29510
Epoch: [1] [ 420/1251] eta: 0:07:23 lr: 0.000010 loss: 2.7880 (2.7105) time: 0.5165 data: 0.0003 max mem: 29510
Epoch: [1] [ 430/1251] eta: 0:07:17 lr: 0.000010 loss: 2.6672 (2.7086) time: 0.5201 data: 0.0003 max mem: 29510
Epoch: [1] [ 440/1251] eta: 0:07:12 lr: 0.000010 loss: 2.7086 (2.7090) time: 0.5317 data: 0.0002 max mem: 29510
Epoch: [1] [ 450/1251] eta: 0:07:06 lr: 0.000010 loss: 2.6901 (2.7074) time: 0.5300 data: 0.0002 max mem: 29510
Epoch: [1] [ 460/1251] eta: 0:07:01 lr: 0.000010 loss: 2.6936 (2.7067) time: 0.5232 data: 0.0002 max mem: 29510
Epoch: [1] [ 470/1251] eta: 0:06:55 lr: 0.000010 loss: 2.7154 (2.7016) time: 0.5212 data: 0.0002 max mem: 29510
Epoch: [1] [ 480/1251] eta: 0:06:50 lr: 0.000010 loss: 2.7154 (2.7018) time: 0.5236 data: 0.0002 max mem: 29510
Epoch: [1] [ 490/1251] eta: 0:06:45 lr: 0.000010 loss: 2.7761 (2.7021) time: 0.5282 data: 0.0002 max mem: 29510
Epoch: [1] [ 500/1251] eta: 0:06:39 lr: 0.000010 loss: 2.8353 (2.7022) time: 0.5245 data: 0.0002 max mem: 29510
Epoch: [1] [ 510/1251] eta: 0:06:34 lr: 0.000010 loss: 2.8367 (2.7037) time: 0.5235 data: 0.0002 max mem: 29510
Epoch: [1] [ 520/1251] eta: 0:06:28 lr: 0.000010 loss: 2.9407 (2.7096) time: 0.5242 data: 0.0003 max mem: 29510
Epoch: [1] [ 530/1251] eta: 0:06:23 lr: 0.000010 loss: 3.0191 (2.7127) time: 0.5200 data: 0.0003 max mem: 29510
Epoch: [1] [ 540/1251] eta: 0:06:17 lr: 0.000010 loss: 2.9282 (2.7135) time: 0.5242 data: 0.0002 max mem: 29510
Epoch: [1] [ 550/1251] eta: 0:06:12 lr: 0.000010 loss: 2.6278 (2.7128) time: 0.5312 data: 0.0002 max mem: 29510
Epoch: [1] [ 560/1251] eta: 0:06:07 lr: 0.000010 loss: 2.6824 (2.7120) time: 0.5243 data: 0.0002 max mem: 29510
Epoch: [1] [ 570/1251] eta: 0:06:01 lr: 0.000010 loss: 2.7418 (2.7118) time: 0.5195 data: 0.0002 max mem: 29510
Epoch: [1] [ 580/1251] eta: 0:05:56 lr: 0.000010 loss: 2.8165 (2.7109) time: 0.5200 data: 0.0002 max mem: 29510
Epoch: [1] [ 590/1251] eta: 0:05:50 lr: 0.000010 loss: 2.8147 (2.7089) time: 0.5211 data: 0.0002 max mem: 29510
Epoch: [1] [ 600/1251] eta: 0:05:45 lr: 0.000010 loss: 2.8872 (2.7086) time: 0.5245 data: 0.0002 max mem: 29510
Epoch: [1] [ 610/1251] eta: 0:05:40 lr: 0.000010 loss: 2.8979 (2.7093) time: 0.5256 data: 0.0002 max mem: 29510
Epoch: [1] [ 620/1251] eta: 0:05:34 lr: 0.000010 loss: 2.8734 (2.7114) time: 0.5240 data: 0.0003 max mem: 29510
Epoch: [1] [ 630/1251] eta: 0:05:29 lr: 0.000010 loss: 2.8076 (2.7091) time: 0.5263 data: 0.0003 max mem: 29510
Epoch: [1] [ 640/1251] eta: 0:05:24 lr: 0.000010 loss: 2.8654 (2.7096) time: 0.5286 data: 0.0002 max mem: 29510
Epoch: [1] [ 650/1251] eta: 0:05:18 lr: 0.000010 loss: 2.8924 (2.7095) time: 0.5260 data: 0.0002 max mem: 29510
Epoch: [1] [ 660/1251] eta: 0:05:13 lr: 0.000010 loss: 2.7166 (2.7087) time: 0.5279 data: 0.0003 max mem: 29510
Epoch: [1] [ 670/1251] eta: 0:05:07 lr: 0.000010 loss: 2.7074 (2.7085) time: 0.5258 data: 0.0002 max mem: 29510
Epoch: [1] [ 680/1251] eta: 0:05:02 lr: 0.000010 loss: 2.8757 (2.7124) time: 0.5183 data: 0.0002 max mem: 29510
Epoch: [1] [ 690/1251] eta: 0:04:57 lr: 0.000010 loss: 2.9263 (2.7101) time: 0.5217 data: 0.0003 max mem: 29510
Epoch: [1] [ 700/1251] eta: 0:04:51 lr: 0.000010 loss: 2.7574 (2.7117) time: 0.5227 data: 0.0003 max mem: 29510
Epoch: [1] [ 710/1251] eta: 0:04:46 lr: 0.000010 loss: 2.9283 (2.7123) time: 0.5191 data: 0.0003 max mem: 29510
Epoch: [1] [ 720/1251] eta: 0:04:41 lr: 0.000010 loss: 2.9430 (2.7134) time: 0.5236 data: 0.0002 max mem: 29510
Epoch: [1] [ 730/1251] eta: 0:04:35 lr: 0.000010 loss: 2.9407 (2.7144) time: 0.5282 data: 0.0002 max mem: 29510
Epoch: [1] [ 740/1251] eta: 0:04:30 lr: 0.000010 loss: 2.8999 (2.7146) time: 0.5256 data: 0.0003 max mem: 29510
Epoch: [1] [ 750/1251] eta: 0:04:25 lr: 0.000010 loss: 2.8457 (2.7160) time: 0.5264 data: 0.0003 max mem: 29510
Epoch: [1] [ 760/1251] eta: 0:04:19 lr: 0.000010 loss: 2.8397 (2.7178) time: 0.5238 data: 0.0002 max mem: 29510
Epoch: [1] [ 770/1251] eta: 0:04:14 lr: 0.000010 loss: 2.7140 (2.7177) time: 0.5156 data: 0.0002 max mem: 29510
Epoch: [1] [ 780/1251] eta: 0:04:09 lr: 0.000010 loss: 2.6054 (2.7139) time: 0.5165 data: 0.0002 max mem: 29510
Epoch: [1] [ 790/1251] eta: 0:04:03 lr: 0.000010 loss: 2.6389 (2.7160) time: 0.5213 data: 0.0002 max mem: 29510
Epoch: [1] [ 800/1251] eta: 0:03:58 lr: 0.000010 loss: 2.9175 (2.7157) time: 0.5231 data: 0.0002 max mem: 29510
Epoch: [1] [ 810/1251] eta: 0:03:53 lr: 0.000010 loss: 2.7703 (2.7152) time: 0.5250 data: 0.0002 max mem: 29510
Epoch: [1] [ 820/1251] eta: 0:03:47 lr: 0.000010 loss: 2.5652 (2.7127) time: 0.5218 data: 0.0002 max mem: 29510
Epoch: [1] [ 830/1251] eta: 0:03:42 lr: 0.000010 loss: 2.9312 (2.7160) time: 0.5171 data: 0.0002 max mem: 29510
Epoch: [1] [ 840/1251] eta: 0:03:37 lr: 0.000010 loss: 2.9525 (2.7161) time: 0.5200 data: 0.0002 max mem: 29510
Epoch: [1] [ 850/1251] eta: 0:03:31 lr: 0.000010 loss: 2.7623 (2.7158) time: 0.5221 data: 0.0002 max mem: 29510
Epoch: [1] [ 860/1251] eta: 0:03:26 lr: 0.000010 loss: 2.7112 (2.7136) time: 0.5257 data: 0.0002 max mem: 29510
Epoch: [1] [ 870/1251] eta: 0:03:21 lr: 0.000010 loss: 2.8249 (2.7143) time: 0.5265 data: 0.0002 max mem: 29510
Epoch: [1] [ 880/1251] eta: 0:03:15 lr: 0.000010 loss: 2.8835 (2.7142) time: 0.5173 data: 0.0002 max mem: 29510
Epoch: [1] [ 890/1251] eta: 0:03:10 lr: 0.000010 loss: 2.7819 (2.7140) time: 0.5186 data: 0.0002 max mem: 29510
Epoch: [1] [ 900/1251] eta: 0:03:05 lr: 0.000010 loss: 2.8592 (2.7144) time: 0.5206 data: 0.0002 max mem: 29510
Epoch: [1] [ 910/1251] eta: 0:03:00 lr: 0.000010 loss: 2.8019 (2.7132) time: 0.5175 data: 0.0003 max mem: 29510
Epoch: [1] [ 920/1251] eta: 0:02:54 lr: 0.000010 loss: 2.7564 (2.7131) time: 0.5193 data: 0.0003 max mem: 29510
Epoch: [1] [ 930/1251] eta: 0:02:49 lr: 0.000010 loss: 2.7603 (2.7123) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [1] [ 940/1251] eta: 0:02:44 lr: 0.000010 loss: 2.8024 (2.7118) time: 0.5247 data: 0.0002 max mem: 29510
Epoch: [1] [ 950/1251] eta: 0:02:38 lr: 0.000010 loss: 2.7831 (2.7118) time: 0.5169 data: 0.0002 max mem: 29510
Epoch: [1] [ 960/1251] eta: 0:02:33 lr: 0.000010 loss: 2.7447 (2.7100) time: 0.5193 data: 0.0002 max mem: 29510
Epoch: [1] [ 970/1251] eta: 0:02:28 lr: 0.000010 loss: 2.8250 (2.7131) time: 0.5211 data: 0.0002 max mem: 29510
Epoch: [1] [ 980/1251] eta: 0:02:22 lr: 0.000010 loss: 2.9572 (2.7135) time: 0.5178 data: 0.0003 max mem: 29510
Epoch: [1] [ 990/1251] eta: 0:02:17 lr: 0.000010 loss: 2.8331 (2.7145) time: 0.5190 data: 0.0002 max mem: 29510
Epoch: [1] [1000/1251] eta: 0:02:12 lr: 0.000010 loss: 2.7167 (2.7132) time: 0.5197 data: 0.0002 max mem: 29510
Epoch: [1] [1010/1251] eta: 0:02:07 lr: 0.000010 loss: 2.5027 (2.7108) time: 0.5226 data: 0.0002 max mem: 29510
Epoch: [1] [1020/1251] eta: 0:02:01 lr: 0.000010 loss: 2.6094 (2.7100) time: 0.5249 data: 0.0002 max mem: 29510
Epoch: [1] [1030/1251] eta: 0:01:56 lr: 0.000010 loss: 2.7917 (2.7107) time: 0.5193 data: 0.0003 max mem: 29510
Epoch: [1] [1040/1251] eta: 0:01:51 lr: 0.000010 loss: 2.7917 (2.7117) time: 0.5188 data: 0.0002 max mem: 29510
Epoch: [1] [1050/1251] eta: 0:01:45 lr: 0.000010 loss: 2.6927 (2.7108) time: 0.5230 data: 0.0002 max mem: 29510
Epoch: [1] [1060/1251] eta: 0:01:40 lr: 0.000010 loss: 2.6224 (2.7091) time: 0.5209 data: 0.0002 max mem: 29510
Epoch: [1] [1070/1251] eta: 0:01:35 lr: 0.000010 loss: 2.5953 (2.7078) time: 0.5160 data: 0.0002 max mem: 29510
Epoch: [1] [1080/1251] eta: 0:01:30 lr: 0.000010 loss: 2.7588 (2.7081) time: 0.5133 data: 0.0002 max mem: 29510
Epoch: [1] [1090/1251] eta: 0:01:24 lr: 0.000010 loss: 2.8124 (2.7080) time: 0.5135 data: 0.0003 max mem: 29510
Epoch: [1] [1100/1251] eta: 0:01:19 lr: 0.000010 loss: 2.8124 (2.7081) time: 0.5186 data: 0.0002 max mem: 29510
Epoch: [1] [1110/1251] eta: 0:01:14 lr: 0.000010 loss: 2.9080 (2.7097) time: 0.5140 data: 0.0002 max mem: 29510
Epoch: [1] [1120/1251] eta: 0:01:08 lr: 0.000010 loss: 2.9546 (2.7099) time: 0.5159 data: 0.0002 max mem: 29510
Epoch: [1] [1130/1251] eta: 0:01:03 lr: 0.000010 loss: 2.9546 (2.7116) time: 0.5200 data: 0.0002 max mem: 29510
Epoch: [1] [1140/1251] eta: 0:00:58 lr: 0.000010 loss: 2.9929 (2.7117) time: 0.5230 data: 0.0002 max mem: 29510
Epoch: [1] [1150/1251] eta: 0:00:53 lr: 0.000010 loss: 2.9096 (2.7117) time: 0.5242 data: 0.0002 max mem: 29510
Epoch: [1] [1160/1251] eta: 0:00:47 lr: 0.000010 loss: 2.9367 (2.7131) time: 0.5153 data: 0.0002 max mem: 29510
Epoch: [1] [1170/1251] eta: 0:00:42 lr: 0.000010 loss: 2.8733 (2.7140) time: 0.5180 data: 0.0002 max mem: 29510
Epoch: [1] [1180/1251] eta: 0:00:37 lr: 0.000010 loss: 2.7844 (2.7136) time: 0.5249 data: 0.0002 max mem: 29510
Epoch: [1] [1190/1251] eta: 0:00:32 lr: 0.000010 loss: 2.6976 (2.7126) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [1] [1200/1251] eta: 0:00:26 lr: 0.000010 loss: 2.7392 (2.7126) time: 0.5221 data: 0.0003 max mem: 29510
Epoch: [1] [1210/1251] eta: 0:00:21 lr: 0.000010 loss: 2.7392 (2.7122) time: 0.5207 data: 0.0003 max mem: 29510
Epoch: [1] [1220/1251] eta: 0:00:16 lr: 0.000010 loss: 2.7397 (2.7135) time: 0.5223 data: 0.0002 max mem: 29510
Epoch: [1] [1230/1251] eta: 0:00:11 lr: 0.000010 loss: 2.8806 (2.7148) time: 0.5201 data: 0.0002 max mem: 29510
Epoch: [1] [1240/1251] eta: 0:00:05 lr: 0.000010 loss: 2.8720 (2.7145) time: 0.5071 data: 0.0008 max mem: 29510
Epoch: [1] [1250/1251] eta: 0:00:00 lr: 0.000010 loss: 2.8443 (2.7155) time: 0.4942 data: 0.0008 max mem: 29510
Epoch: [1] Total time: 0:10:57 (0.5258 s / it)
Averaged stats: lr: 0.000010 loss: 2.8443 (2.7114)
Test: [ 0/25] eta: 0:03:19 loss: 0.7302 (0.7302) acc1: 90.8000 (90.8000) acc5: 98.8000 (98.8000) time: 7.9729 data: 7.6830 max mem: 29510
Test: [10/25] eta: 0:00:15 loss: 0.9027 (0.8692) acc1: 86.4000 (86.4000) acc5: 98.0000 (97.9273) time: 1.0240 data: 0.6987 max mem: 29510
Test: [20/25] eta: 0:00:03 loss: 0.9823 (0.9887) acc1: 80.8000 (83.3143) acc5: 96.4000 (96.8000) time: 0.3129 data: 0.0002 max mem: 29510
Test: [24/25] eta: 0:00:00 loss: 1.0455 (0.9992) acc1: 80.4000 (82.9120) acc5: 96.4000 (96.7840) time: 0.3032 data: 0.0001 max mem: 29510
Test: Total time: 0:00:15 (0.6258 s / it)
* Acc@1 83.294 Acc@5 96.756 loss 0.991
Accuracy of the network on the 50000 test images: 83.3%
Max accuracy: 83.29%
Epoch: [2] [ 0/1251] eta: 1:20:29 lr: 0.000010 loss: 3.1677 (3.1677) time: 3.8609 data: 2.9608 max mem: 29510
Epoch: [2] [ 10/1251] eta: 0:17:59 lr: 0.000010 loss: 2.8287 (2.7721) time: 0.8701 data: 0.2916 max mem: 29510
Epoch: [2] [ 20/1251] eta: 0:14:31 lr: 0.000010 loss: 2.8331 (2.8507) time: 0.5501 data: 0.0124 max mem: 29510
Epoch: [2] [ 30/1251] eta: 0:13:14 lr: 0.000010 loss: 2.9095 (2.8616) time: 0.5301 data: 0.0002 max mem: 29510
Epoch: [2] [ 40/1251] eta: 0:12:32 lr: 0.000010 loss: 2.9006 (2.8321) time: 0.5307 data: 0.0002 max mem: 29510
Epoch: [2] [ 50/1251] eta: 0:12:02 lr: 0.000010 loss: 2.8187 (2.7850) time: 0.5264 data: 0.0002 max mem: 29510
Epoch: [2] [ 60/1251] eta: 0:11:40 lr: 0.000010 loss: 2.7904 (2.7791) time: 0.5201 data: 0.0002 max mem: 29510
Epoch: [2] [ 70/1251] eta: 0:11:24 lr: 0.000010 loss: 2.7576 (2.7644) time: 0.5232 data: 0.0002 max mem: 29510
Epoch: [2] [ 80/1251] eta: 0:11:11 lr: 0.000010 loss: 2.6431 (2.7510) time: 0.5283 data: 0.0002 max mem: 29510
Epoch: [2] [ 90/1251] eta: 0:10:59 lr: 0.000010 loss: 2.6102 (2.7192) time: 0.5269 data: 0.0002 max mem: 29510
Epoch: [2] [ 100/1251] eta: 0:10:49 lr: 0.000010 loss: 2.7913 (2.7223) time: 0.5283 data: 0.0002 max mem: 29510
Epoch: [2] [ 110/1251] eta: 0:10:39 lr: 0.000010 loss: 2.9012 (2.7272) time: 0.5271 data: 0.0003 max mem: 29510
Epoch: [2] [ 120/1251] eta: 0:10:30 lr: 0.000010 loss: 2.9140 (2.7360) time: 0.5220 data: 0.0003 max mem: 29510
Epoch: [2] [ 130/1251] eta: 0:10:21 lr: 0.000010 loss: 2.8240 (2.7279) time: 0.5168 data: 0.0002 max mem: 29510
Epoch: [2] [ 140/1251] eta: 0:10:13 lr: 0.000010 loss: 2.8172 (2.7341) time: 0.5188 data: 0.0002 max mem: 29510
Epoch: [2] [ 150/1251] eta: 0:10:05 lr: 0.000010 loss: 2.8110 (2.7344) time: 0.5258 data: 0.0002 max mem: 29510
Epoch: [2] [ 160/1251] eta: 0:09:58 lr: 0.000010 loss: 2.8366 (2.7354) time: 0.5273 data: 0.0002 max mem: 29510
Epoch: [2] [ 170/1251] eta: 0:09:52 lr: 0.000010 loss: 2.8366 (2.7395) time: 0.5284 data: 0.0002 max mem: 29510
Epoch: [2] [ 180/1251] eta: 0:09:45 lr: 0.000010 loss: 2.7794 (2.7388) time: 0.5296 data: 0.0002 max mem: 29510
Epoch: [2] [ 190/1251] eta: 0:09:38 lr: 0.000010 loss: 2.7662 (2.7306) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [2] [ 200/1251] eta: 0:09:31 lr: 0.000010 loss: 2.7592 (2.7340) time: 0.5165 data: 0.0002 max mem: 29510
Epoch: [2] [ 210/1251] eta: 0:09:25 lr: 0.000010 loss: 2.7674 (2.7407) time: 0.5234 data: 0.0002 max mem: 29510
Epoch: [2] [ 220/1251] eta: 0:09:19 lr: 0.000010 loss: 2.7608 (2.7325) time: 0.5293 data: 0.0002 max mem: 29510
Epoch: [2] [ 230/1251] eta: 0:09:13 lr: 0.000010 loss: 2.7057 (2.7310) time: 0.5285 data: 0.0003 max mem: 29510
Epoch: [2] [ 240/1251] eta: 0:09:07 lr: 0.000010 loss: 2.8256 (2.7332) time: 0.5266 data: 0.0002 max mem: 29510
Epoch: [2] [ 250/1251] eta: 0:09:01 lr: 0.000010 loss: 2.8909 (2.7355) time: 0.5267 data: 0.0003 max mem: 29510
Epoch: [2] [ 260/1251] eta: 0:08:55 lr: 0.000010 loss: 2.8024 (2.7375) time: 0.5269 data: 0.0003 max mem: 29510
Epoch: [2] [ 270/1251] eta: 0:08:49 lr: 0.000010 loss: 2.7405 (2.7380) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [2] [ 280/1251] eta: 0:08:43 lr: 0.000010 loss: 2.8701 (2.7414) time: 0.5223 data: 0.0002 max mem: 29510
Epoch: [2] [ 290/1251] eta: 0:08:37 lr: 0.000010 loss: 2.9194 (2.7427) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [2] [ 300/1251] eta: 0:08:31 lr: 0.000010 loss: 2.7789 (2.7383) time: 0.5272 data: 0.0002 max mem: 29510
Epoch: [2] [ 310/1251] eta: 0:08:25 lr: 0.000010 loss: 2.8043 (2.7419) time: 0.5240 data: 0.0002 max mem: 29510
Epoch: [2] [ 320/1251] eta: 0:08:19 lr: 0.000010 loss: 2.6941 (2.7385) time: 0.5224 data: 0.0003 max mem: 29510
Epoch: [2] [ 330/1251] eta: 0:08:14 lr: 0.000010 loss: 2.6560 (2.7329) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [2] [ 340/1251] eta: 0:08:08 lr: 0.000010 loss: 2.6090 (2.7274) time: 0.5280 data: 0.0002 max mem: 29510
Epoch: [2] [ 350/1251] eta: 0:08:03 lr: 0.000010 loss: 2.6942 (2.7271) time: 0.5294 data: 0.0002 max mem: 29510
Epoch: [2] [ 360/1251] eta: 0:07:57 lr: 0.000010 loss: 2.7854 (2.7294) time: 0.5190 data: 0.0002 max mem: 29510
Epoch: [2] [ 370/1251] eta: 0:07:51 lr: 0.000010 loss: 3.0127 (2.7329) time: 0.5155 data: 0.0002 max mem: 29510
Epoch: [2] [ 380/1251] eta: 0:07:45 lr: 0.000010 loss: 2.8155 (2.7305) time: 0.5228 data: 0.0018 max mem: 29510
Epoch: [2] [ 390/1251] eta: 0:07:40 lr: 0.000010 loss: 2.7237 (2.7258) time: 0.5262 data: 0.0018 max mem: 29510
Epoch: [2] [ 400/1251] eta: 0:07:34 lr: 0.000010 loss: 2.7746 (2.7296) time: 0.5254 data: 0.0002 max mem: 29510
Epoch: [2] [ 410/1251] eta: 0:07:29 lr: 0.000010 loss: 2.7746 (2.7280) time: 0.5254 data: 0.0002 max mem: 29510
Epoch: [2] [ 420/1251] eta: 0:07:23 lr: 0.000010 loss: 2.6812 (2.7271) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [2] [ 430/1251] eta: 0:07:18 lr: 0.000010 loss: 2.6849 (2.7279) time: 0.5195 data: 0.0002 max mem: 29510
Epoch: [2] [ 440/1251] eta: 0:07:12 lr: 0.000010 loss: 2.7866 (2.7292) time: 0.5216 data: 0.0002 max mem: 29510
Epoch: [2] [ 450/1251] eta: 0:07:06 lr: 0.000010 loss: 2.8407 (2.7311) time: 0.5218 data: 0.0002 max mem: 29510
Epoch: [2] [ 460/1251] eta: 0:07:01 lr: 0.000010 loss: 2.9675 (2.7311) time: 0.5251 data: 0.0002 max mem: 29510
Epoch: [2] [ 470/1251] eta: 0:06:56 lr: 0.000010 loss: 2.9282 (2.7309) time: 0.5260 data: 0.0002 max mem: 29510
Epoch: [2] [ 480/1251] eta: 0:06:50 lr: 0.000010 loss: 2.6788 (2.7273) time: 0.5242 data: 0.0002 max mem: 29510
Epoch: [2] [ 490/1251] eta: 0:06:45 lr: 0.000010 loss: 2.6788 (2.7265) time: 0.5232 data: 0.0003 max mem: 29510
Epoch: [2] [ 500/1251] eta: 0:06:39 lr: 0.000010 loss: 2.7304 (2.7228) time: 0.5240 data: 0.0002 max mem: 29510
Epoch: [2] [ 510/1251] eta: 0:06:34 lr: 0.000010 loss: 2.5600 (2.7180) time: 0.5247 data: 0.0002 max mem: 29510
Epoch: [2] [ 520/1251] eta: 0:06:28 lr: 0.000010 loss: 2.5600 (2.7157) time: 0.5241 data: 0.0002 max mem: 29510
Epoch: [2] [ 530/1251] eta: 0:06:23 lr: 0.000010 loss: 2.7888 (2.7156) time: 0.5227 data: 0.0002 max mem: 29510
Epoch: [2] [ 540/1251] eta: 0:06:17 lr: 0.000010 loss: 2.6925 (2.7117) time: 0.5192 data: 0.0003 max mem: 29510
Epoch: [2] [ 550/1251] eta: 0:06:12 lr: 0.000010 loss: 2.7388 (2.7140) time: 0.5206 data: 0.0003 max mem: 29510
Epoch: [2] [ 560/1251] eta: 0:06:07 lr: 0.000010 loss: 2.8252 (2.7122) time: 0.5273 data: 0.0003 max mem: 29510
Epoch: [2] [ 570/1251] eta: 0:06:01 lr: 0.000010 loss: 2.7558 (2.7129) time: 0.5259 data: 0.0002 max mem: 29510
Epoch: [2] [ 580/1251] eta: 0:05:56 lr: 0.000010 loss: 2.7990 (2.7131) time: 0.5209 data: 0.0002 max mem: 29510
Epoch: [2] [ 590/1251] eta: 0:05:50 lr: 0.000010 loss: 2.6637 (2.7120) time: 0.5213 data: 0.0002 max mem: 29510
Epoch: [2] [ 600/1251] eta: 0:05:45 lr: 0.000010 loss: 2.7281 (2.7121) time: 0.5182 data: 0.0002 max mem: 29510
Epoch: [2] [ 610/1251] eta: 0:05:39 lr: 0.000010 loss: 2.8532 (2.7097) time: 0.5149 data: 0.0002 max mem: 29510
Epoch: [2] [ 620/1251] eta: 0:05:34 lr: 0.000010 loss: 2.8438 (2.7084) time: 0.5222 data: 0.0002 max mem: 29510
Epoch: [2] [ 630/1251] eta: 0:05:29 lr: 0.000010 loss: 2.8438 (2.7091) time: 0.5275 data: 0.0002 max mem: 29510
Epoch: [2] [ 640/1251] eta: 0:05:23 lr: 0.000010 loss: 2.8063 (2.7090) time: 0.5222 data: 0.0002 max mem: 29510
Epoch: [2] [ 650/1251] eta: 0:05:18 lr: 0.000010 loss: 2.7160 (2.7063) time: 0.5201 data: 0.0002 max mem: 29510
Epoch: [2] [ 660/1251] eta: 0:05:13 lr: 0.000010 loss: 2.7160 (2.7053) time: 0.5227 data: 0.0003 max mem: 29510
Epoch: [2] [ 670/1251] eta: 0:05:07 lr: 0.000010 loss: 2.7965 (2.7031) time: 0.5225 data: 0.0002 max mem: 29510
Epoch: [2] [ 680/1251] eta: 0:05:02 lr: 0.000010 loss: 2.8171 (2.7032) time: 0.5229 data: 0.0002 max mem: 29510
Epoch: [2] [ 690/1251] eta: 0:04:57 lr: 0.000010 loss: 2.6881 (2.7005) time: 0.5254 data: 0.0002 max mem: 29510
Epoch: [2] [ 700/1251] eta: 0:04:51 lr: 0.000010 loss: 2.7766 (2.7023) time: 0.5220 data: 0.0002 max mem: 29510
Epoch: [2] [ 710/1251] eta: 0:04:46 lr: 0.000010 loss: 2.8120 (2.7041) time: 0.5196 data: 0.0002 max mem: 29510
Epoch: [2] [ 720/1251] eta: 0:04:40 lr: 0.000010 loss: 2.6952 (2.7035) time: 0.5201 data: 0.0002 max mem: 29510
Epoch: [2] [ 730/1251] eta: 0:04:35 lr: 0.000010 loss: 2.5466 (2.7040) time: 0.5186 data: 0.0002 max mem: 29510
Epoch: [2] [ 740/1251] eta: 0:04:30 lr: 0.000010 loss: 2.6594 (2.7008) time: 0.5160 data: 0.0002 max mem: 29510
Epoch: [2] [ 750/1251] eta: 0:04:24 lr: 0.000010 loss: 2.6532 (2.7016) time: 0.5186 data: 0.0002 max mem: 29510
Epoch: [2] [ 760/1251] eta: 0:04:19 lr: 0.000010 loss: 2.6479 (2.7000) time: 0.5197 data: 0.0002 max mem: 29510
Epoch: [2] [ 770/1251] eta: 0:04:14 lr: 0.000010 loss: 2.7579 (2.6999) time: 0.5204 data: 0.0002 max mem: 29510
Epoch: [2] [ 780/1251] eta: 0:04:08 lr: 0.000010 loss: 2.8219 (2.6994) time: 0.5177 data: 0.0003 max mem: 29510
Epoch: [2] [ 790/1251] eta: 0:04:03 lr: 0.000010 loss: 2.8462 (2.6980) time: 0.5110 data: 0.0002 max mem: 29510
Epoch: [2] [ 800/1251] eta: 0:03:58 lr: 0.000010 loss: 2.8124 (2.6979) time: 0.5154 data: 0.0002 max mem: 29510
Epoch: [2] [ 810/1251] eta: 0:03:52 lr: 0.000010 loss: 2.7411 (2.6984) time: 0.5187 data: 0.0002 max mem: 29510
Epoch: [2] [ 820/1251] eta: 0:03:47 lr: 0.000010 loss: 2.8292 (2.6999) time: 0.5188 data: 0.0003 max mem: 29510
Epoch: [2] [ 830/1251] eta: 0:03:42 lr: 0.000010 loss: 2.9073 (2.7017) time: 0.5175 data: 0.0003 max mem: 29510
Epoch: [2] [ 840/1251] eta: 0:03:36 lr: 0.000010 loss: 2.8772 (2.7021) time: 0.5185 data: 0.0002 max mem: 29510
Epoch: [2] [ 850/1251] eta: 0:03:31 lr: 0.000010 loss: 2.8772 (2.7040) time: 0.5172 data: 0.0002 max mem: 29510
Epoch: [2] [ 860/1251] eta: 0:03:26 lr: 0.000010 loss: 2.7497 (2.7009) time: 0.5145 data: 0.0002 max mem: 29510
Epoch: [2] [ 870/1251] eta: 0:03:20 lr: 0.000010 loss: 2.7497 (2.7015) time: 0.5186 data: 0.0002 max mem: 29510
Epoch: [2] [ 880/1251] eta: 0:03:15 lr: 0.000010 loss: 3.0098 (2.7025) time: 0.5227 data: 0.0002 max mem: 29510
Epoch: [2] [ 890/1251] eta: 0:03:10 lr: 0.000010 loss: 2.7169 (2.7010) time: 0.5234 data: 0.0002 max mem: 29510
Epoch: [2] [ 900/1251] eta: 0:03:04 lr: 0.000010 loss: 2.4858 (2.6984) time: 0.5201 data: 0.0002 max mem: 29510
Epoch: [2] [ 910/1251] eta: 0:02:59 lr: 0.000010 loss: 2.5641 (2.6978) time: 0.5218 data: 0.0002 max mem: 29510
Epoch: [2] [ 920/1251] eta: 0:02:54 lr: 0.000010 loss: 2.8768 (2.6981) time: 0.5205 data: 0.0002 max mem: 29510
Epoch: [2] [ 930/1251] eta: 0:02:49 lr: 0.000010 loss: 2.8746 (2.6970) time: 0.5154 data: 0.0002 max mem: 29510
Epoch: [2] [ 940/1251] eta: 0:02:43 lr: 0.000010 loss: 2.8746 (2.6984) time: 0.5179 data: 0.0002 max mem: 29510
Epoch: [2] [ 950/1251] eta: 0:02:38 lr: 0.000010 loss: 2.8362 (2.6992) time: 0.5192 data: 0.0002 max mem: 29510
Epoch: [2] [ 960/1251] eta: 0:02:33 lr: 0.000010 loss: 2.7220 (2.6993) time: 0.5174 data: 0.0002 max mem: 29510
Epoch: [2] [ 970/1251] eta: 0:02:27 lr: 0.000010 loss: 2.7220 (2.6981) time: 0.5190 data: 0.0002 max mem: 29510
Epoch: [2] [ 980/1251] eta: 0:02:22 lr: 0.000010 loss: 2.8057 (2.6971) time: 0.5215 data: 0.0002 max mem: 29510
Epoch: [2] [ 990/1251] eta: 0:02:17 lr: 0.000010 loss: 2.9063 (2.6981) time: 0.5189 data: 0.0002 max mem: 29510
Epoch: [2] [1000/1251] eta: 0:02:12 lr: 0.000010 loss: 2.8916 (2.6991) time: 0.5182 data: 0.0002 max mem: 29510
Epoch: [2] [1010/1251] eta: 0:02:06 lr: 0.000010 loss: 2.8784 (2.6977) time: 0.5204 data: 0.0002 max mem: 29510
Epoch: [2] [1020/1251] eta: 0:02:01 lr: 0.000010 loss: 2.8421 (2.6979) time: 0.5195 data: 0.0002 max mem: 29510
Epoch: [2] [1030/1251] eta: 0:01:56 lr: 0.000010 loss: 2.8208 (2.6982) time: 0.5218 data: 0.0002 max mem: 29510
Epoch: [2] [1040/1251] eta: 0:01:50 lr: 0.000010 loss: 2.7964 (2.6983) time: 0.5219 data: 0.0002 max mem: 29510
Epoch: [2] [1050/1251] eta: 0:01:45 lr: 0.000010 loss: 2.7761 (2.6983) time: 0.5181 data: 0.0002 max mem: 29510
Epoch: [2] [1060/1251] eta: 0:01:40 lr: 0.000010 loss: 2.7870 (2.6992) time: 0.5171 data: 0.0003 max mem: 29510
Epoch: [2] [1070/1251] eta: 0:01:35 lr: 0.000010 loss: 2.9247 (2.6994) time: 0.5210 data: 0.0003 max mem: 29510
Epoch: [2] [1080/1251] eta: 0:01:29 lr: 0.000010 loss: 2.6780 (2.6988) time: 0.5228 data: 0.0003 max mem: 29510
Epoch: [2] [1090/1251] eta: 0:01:24 lr: 0.000010 loss: 2.8197 (2.7006) time: 0.5211 data: 0.0002 max mem: 29510
Epoch: [2] [1100/1251] eta: 0:01:19 lr: 0.000010 loss: 2.7285 (2.6998) time: 0.5201 data: 0.0002 max mem: 29510
Epoch: [2] [1110/1251] eta: 0:01:14 lr: 0.000010 loss: 2.7285 (2.7013) time: 0.5199 data: 0.0002 max mem: 29510
Epoch: [2] [1120/1251] eta: 0:01:08 lr: 0.000010 loss: 2.8552 (2.7019) time: 0.5196 data: 0.0002 max mem: 29510
Epoch: [2] [1130/1251] eta: 0:01:03 lr: 0.000010 loss: 2.7326 (2.7013) time: 0.5230 data: 0.0002 max mem: 29510
Epoch: [2] [1140/1251] eta: 0:00:58 lr: 0.000010 loss: 2.7326 (2.7017) time: 0.5219 data: 0.0002 max mem: 29510
Epoch: [2] [1150/1251] eta: 0:00:53 lr: 0.000010 loss: 2.8902 (2.7041) time: 0.5206 data: 0.0002 max mem: 29510
Epoch: [2] [1160/1251] eta: 0:00:47 lr: 0.000010 loss: 2.9812 (2.7034) time: 0.5227 data: 0.0002 max mem: 29510
Epoch: [2] [1170/1251] eta: 0:00:42 lr: 0.000010 loss: 2.7338 (2.7031) time: 0.5240 data: 0.0003 max mem: 29510
Epoch: [2] [1180/1251] eta: 0:00:37 lr: 0.000010 loss: 2.8801 (2.7043) time: 0.5215 data: 0.0003 max mem: 29510
Epoch: [2] [1190/1251] eta: 0:00:32 lr: 0.000010 loss: 2.8801 (2.7049) time: 0.5220 data: 0.0002 max mem: 29510
Epoch: [2] [1200/1251] eta: 0:00:26 lr: 0.000010 loss: 2.9027 (2.7063) time: 0.5270 data: 0.0002 max mem: 29510
Epoch: [2] [1210/1251] eta: 0:00:21 lr: 0.000010 loss: 2.9103 (2.7072) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [2] [1220/1251] eta: 0:00:16 lr: 0.000010 loss: 2.9239 (2.7076) time: 0.5217 data: 0.0002 max mem: 29510
Epoch: [2] [1230/1251] eta: 0:00:11 lr: 0.000010 loss: 2.8948 (2.7085) time: 0.5259 data: 0.0002 max mem: 29510
Epoch: [2] [1240/1251] eta: 0:00:05 lr: 0.000010 loss: 2.8372 (2.7084) time: 0.5153 data: 0.0009 max mem: 29510
Epoch: [2] [1250/1251] eta: 0:00:00 lr: 0.000010 loss: 2.8372 (2.7081) time: 0.4958 data: 0.0008 max mem: 29510
Epoch: [2] Total time: 0:10:57 (0.5253 s / it)
Averaged stats: lr: 0.000010 loss: 2.8372 (2.6968)
Test: [ 0/25] eta: 0:02:59 loss: 0.6569 (0.6569) acc1: 90.4000 (90.4000) acc5: 98.8000 (98.8000) time: 7.1635 data: 6.8732 max mem: 29510
Test: [10/25] eta: 0:00:14 loss: 0.8138 (0.7845) acc1: 86.8000 (86.8727) acc5: 98.0000 (97.8909) time: 0.9355 data: 0.6251 max mem: 29510
Test: [20/25] eta: 0:00:03 loss: 0.8955 (0.9068) acc1: 81.6000 (83.2952) acc5: 96.4000 (96.7429) time: 0.3117 data: 0.0002 max mem: 29510
Test: [24/25] eta: 0:00:00 loss: 0.9535 (0.9162) acc1: 80.0000 (82.8960) acc5: 96.4000 (96.7040) time: 0.3075 data: 0.0002 max mem: 29510
Test: Total time: 0:00:14 (0.5922 s / it)
* Acc@1 83.376 Acc@5 96.712 loss 0.909
Accuracy of the network on the 50000 test images: 83.4%
Max accuracy: 83.38%
Epoch: [3] [ 0/1251] eta: 1:41:43 lr: 0.000010 loss: 1.9947 (1.9947) time: 4.8792 data: 4.3570 max mem: 29510
Epoch: [3] [ 10/1251] eta: 0:19:23 lr: 0.000010 loss: 2.7887 (2.6597) time: 0.9376 data: 0.3963 max mem: 29510
Epoch: [3] [ 20/1251] eta: 0:15:12 lr: 0.000010 loss: 2.8604 (2.7201) time: 0.5345 data: 0.0002 max mem: 29510
Epoch: [3] [ 30/1251] eta: 0:13:43 lr: 0.000010 loss: 2.7411 (2.6854) time: 0.5293 data: 0.0002 max mem: 29510
Epoch: [3] [ 40/1251] eta: 0:12:50 lr: 0.000010 loss: 2.6677 (2.6853) time: 0.5260 data: 0.0002 max mem: 29510
Epoch: [3] [ 50/1251] eta: 0:12:16 lr: 0.000010 loss: 2.5431 (2.6436) time: 0.5198 data: 0.0002 max mem: 29510
Epoch: [3] [ 60/1251] eta: 0:11:50 lr: 0.000010 loss: 2.6548 (2.6392) time: 0.5163 data: 0.0002 max mem: 29510
Epoch: [3] [ 70/1251] eta: 0:11:34 lr: 0.000010 loss: 2.7262 (2.6379) time: 0.5220 data: 0.0002 max mem: 29510
Epoch: [3] [ 80/1251] eta: 0:11:18 lr: 0.000010 loss: 2.7336 (2.6542) time: 0.5267 data: 0.0002 max mem: 29510
Epoch: [3] [ 90/1251] eta: 0:11:07 lr: 0.000010 loss: 2.7176 (2.6514) time: 0.5282 data: 0.0002 max mem: 29510
Epoch: [3] [ 100/1251] eta: 0:10:55 lr: 0.000010 loss: 2.7042 (2.6374) time: 0.5301 data: 0.0002 max mem: 29510
Epoch: [3] [ 110/1251] eta: 0:10:46 lr: 0.000010 loss: 2.6409 (2.6348) time: 0.5284 data: 0.0002 max mem: 29510
Epoch: [3] [ 120/1251] eta: 0:10:37 lr: 0.000010 loss: 2.7046 (2.6260) time: 0.5316 data: 0.0002 max mem: 29510
Epoch: [3] [ 130/1251] eta: 0:10:28 lr: 0.000010 loss: 2.6303 (2.6252) time: 0.5299 data: 0.0002 max mem: 29510
Epoch: [3] [ 140/1251] eta: 0:10:20 lr: 0.000010 loss: 2.6840 (2.6287) time: 0.5275 data: 0.0002 max mem: 29510
Epoch: [3] [ 150/1251] eta: 0:10:12 lr: 0.000010 loss: 2.6840 (2.6248) time: 0.5271 data: 0.0002 max mem: 29510
Epoch: [3] [ 160/1251] eta: 0:10:05 lr: 0.000010 loss: 2.8058 (2.6401) time: 0.5327 data: 0.0002 max mem: 29510
Epoch: [3] [ 170/1251] eta: 0:09:58 lr: 0.000010 loss: 2.8906 (2.6504) time: 0.5353 data: 0.0002 max mem: 29510
Epoch: [3] [ 180/1251] eta: 0:09:52 lr: 0.000010 loss: 2.8132 (2.6532) time: 0.5342 data: 0.0002 max mem: 29510
Epoch: [3] [ 190/1251] eta: 0:09:44 lr: 0.000010 loss: 2.8000 (2.6547) time: 0.5284 data: 0.0002 max mem: 29510
Epoch: [3] [ 200/1251] eta: 0:09:38 lr: 0.000010 loss: 2.7306 (2.6613) time: 0.5238 data: 0.0003 max mem: 29510
Epoch: [3] [ 210/1251] eta: 0:09:31 lr: 0.000010 loss: 2.6104 (2.6525) time: 0.5277 data: 0.0002 max mem: 29510
Epoch: [3] [ 220/1251] eta: 0:09:25 lr: 0.000010 loss: 2.5815 (2.6534) time: 0.5285 data: 0.0002 max mem: 29510
Epoch: [3] [ 230/1251] eta: 0:09:18 lr: 0.000010 loss: 2.7140 (2.6476) time: 0.5293 data: 0.0002 max mem: 29510
Epoch: [3] [ 240/1251] eta: 0:09:12 lr: 0.000010 loss: 2.5580 (2.6465) time: 0.5267 data: 0.0002 max mem: 29510
Epoch: [3] [ 250/1251] eta: 0:09:05 lr: 0.000010 loss: 2.5814 (2.6422) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [3] [ 260/1251] eta: 0:09:00 lr: 0.000010 loss: 2.7061 (2.6473) time: 0.5287 data: 0.0002 max mem: 29510
Epoch: [3] [ 270/1251] eta: 0:08:53 lr: 0.000010 loss: 2.9018 (2.6506) time: 0.5296 data: 0.0002 max mem: 29510
Epoch: [3] [ 280/1251] eta: 0:08:48 lr: 0.000010 loss: 2.7777 (2.6507) time: 0.5299 data: 0.0002 max mem: 29510
Epoch: [3] [ 290/1251] eta: 0:08:42 lr: 0.000010 loss: 2.5884 (2.6440) time: 0.5345 data: 0.0002 max mem: 29510
Epoch: [3] [ 300/1251] eta: 0:08:36 lr: 0.000010 loss: 2.6592 (2.6458) time: 0.5307 data: 0.0002 max mem: 29510
Epoch: [3] [ 310/1251] eta: 0:08:30 lr: 0.000010 loss: 2.6595 (2.6442) time: 0.5254 data: 0.0002 max mem: 29510
Epoch: [3] [ 320/1251] eta: 0:08:24 lr: 0.000010 loss: 2.8563 (2.6520) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [3] [ 330/1251] eta: 0:08:18 lr: 0.000010 loss: 2.9767 (2.6586) time: 0.5271 data: 0.0002 max mem: 29510
Epoch: [3] [ 340/1251] eta: 0:08:13 lr: 0.000010 loss: 2.7874 (2.6591) time: 0.5307 data: 0.0003 max mem: 29510
Epoch: [3] [ 350/1251] eta: 0:08:07 lr: 0.000010 loss: 2.6810 (2.6620) time: 0.5294 data: 0.0002 max mem: 29510
Epoch: [3] [ 360/1251] eta: 0:08:01 lr: 0.000010 loss: 2.8577 (2.6669) time: 0.5340 data: 0.0002 max mem: 29510
Epoch: [3] [ 370/1251] eta: 0:07:56 lr: 0.000010 loss: 2.9389 (2.6734) time: 0.5373 data: 0.0002 max mem: 29510
Epoch: [3] [ 380/1251] eta: 0:07:50 lr: 0.000010 loss: 2.9229 (2.6722) time: 0.5263 data: 0.0002 max mem: 29510
Epoch: [3] [ 390/1251] eta: 0:07:44 lr: 0.000010 loss: 2.9402 (2.6810) time: 0.5266 data: 0.0003 max mem: 29510
Epoch: [3] [ 400/1251] eta: 0:07:39 lr: 0.000010 loss: 2.9387 (2.6783) time: 0.5322 data: 0.0002 max mem: 29510
Epoch: [3] [ 410/1251] eta: 0:07:33 lr: 0.000010 loss: 2.8127 (2.6815) time: 0.5311 data: 0.0002 max mem: 29510
Epoch: [3] [ 420/1251] eta: 0:07:28 lr: 0.000010 loss: 2.8342 (2.6819) time: 0.5292 data: 0.0002 max mem: 29510
Epoch: [3] [ 430/1251] eta: 0:07:22 lr: 0.000010 loss: 2.7397 (2.6821) time: 0.5275 data: 0.0002 max mem: 29510
Epoch: [3] [ 440/1251] eta: 0:07:16 lr: 0.000010 loss: 2.6545 (2.6806) time: 0.5199 data: 0.0002 max mem: 29510
Epoch: [3] [ 450/1251] eta: 0:07:11 lr: 0.000010 loss: 2.6798 (2.6788) time: 0.5237 data: 0.0002 max mem: 29510
Epoch: [3] [ 460/1251] eta: 0:07:05 lr: 0.000010 loss: 2.8168 (2.6802) time: 0.5304 data: 0.0002 max mem: 29510
Epoch: [3] [ 470/1251] eta: 0:06:59 lr: 0.000010 loss: 2.6870 (2.6772) time: 0.5258 data: 0.0002 max mem: 29510
Epoch: [3] [ 480/1251] eta: 0:06:54 lr: 0.000010 loss: 2.6870 (2.6785) time: 0.5279 data: 0.0002 max mem: 29510
Epoch: [3] [ 490/1251] eta: 0:06:48 lr: 0.000010 loss: 2.8033 (2.6788) time: 0.5295 data: 0.0002 max mem: 29510
Epoch: [3] [ 500/1251] eta: 0:06:43 lr: 0.000010 loss: 2.8828 (2.6828) time: 0.5234 data: 0.0002 max mem: 29510
Epoch: [3] [ 510/1251] eta: 0:06:37 lr: 0.000010 loss: 2.8606 (2.6812) time: 0.5213 data: 0.0002 max mem: 29510
Epoch: [3] [ 520/1251] eta: 0:06:32 lr: 0.000010 loss: 2.3647 (2.6766) time: 0.5273 data: 0.0002 max mem: 29510
Epoch: [3] [ 530/1251] eta: 0:06:26 lr: 0.000010 loss: 2.6620 (2.6775) time: 0.5283 data: 0.0002 max mem: 29510
Epoch: [3] [ 540/1251] eta: 0:06:21 lr: 0.000010 loss: 2.6620 (2.6766) time: 0.5260 data: 0.0002 max mem: 29510
Epoch: [3] [ 550/1251] eta: 0:06:15 lr: 0.000010 loss: 2.6100 (2.6742) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [3] [ 560/1251] eta: 0:06:10 lr: 0.000010 loss: 2.8276 (2.6751) time: 0.5213 data: 0.0002 max mem: 29510
Epoch: [3] [ 570/1251] eta: 0:06:04 lr: 0.000010 loss: 2.8276 (2.6754) time: 0.5302 data: 0.0002 max mem: 29510
Epoch: [3] [ 580/1251] eta: 0:05:59 lr: 0.000010 loss: 2.8217 (2.6761) time: 0.5311 data: 0.0002 max mem: 29510
Epoch: [3] [ 590/1251] eta: 0:05:53 lr: 0.000010 loss: 2.6046 (2.6735) time: 0.5207 data: 0.0002 max mem: 29510
Epoch: [3] [ 600/1251] eta: 0:05:48 lr: 0.000010 loss: 2.7377 (2.6750) time: 0.5200 data: 0.0003 max mem: 29510
Epoch: [3] [ 610/1251] eta: 0:05:42 lr: 0.000010 loss: 2.7976 (2.6722) time: 0.5244 data: 0.0002 max mem: 29510
Epoch: [3] [ 620/1251] eta: 0:05:37 lr: 0.000010 loss: 2.6795 (2.6716) time: 0.5278 data: 0.0002 max mem: 29510
Epoch: [3] [ 630/1251] eta: 0:05:32 lr: 0.000010 loss: 2.8225 (2.6724) time: 0.5283 data: 0.0002 max mem: 29510
Epoch: [3] [ 640/1251] eta: 0:05:26 lr: 0.000010 loss: 2.8478 (2.6747) time: 0.5258 data: 0.0002 max mem: 29510
Epoch: [3] [ 650/1251] eta: 0:05:21 lr: 0.000010 loss: 2.8435 (2.6741) time: 0.5272 data: 0.0002 max mem: 29510
Epoch: [3] [ 660/1251] eta: 0:05:15 lr: 0.000010 loss: 2.6920 (2.6739) time: 0.5293 data: 0.0002 max mem: 29510
Epoch: [3] [ 670/1251] eta: 0:05:10 lr: 0.000010 loss: 2.6811 (2.6722) time: 0.5289 data: 0.0002 max mem: 29510
Epoch: [3] [ 680/1251] eta: 0:05:04 lr: 0.000010 loss: 2.7573 (2.6710) time: 0.5232 data: 0.0002 max mem: 29510
Epoch: [3] [ 690/1251] eta: 0:04:59 lr: 0.000010 loss: 2.7732 (2.6694) time: 0.5197 data: 0.0002 max mem: 29510
Epoch: [3] [ 700/1251] eta: 0:04:54 lr: 0.000010 loss: 2.7732 (2.6714) time: 0.5206 data: 0.0002 max mem: 29510
Epoch: [3] [ 710/1251] eta: 0:04:48 lr: 0.000010 loss: 2.6278 (2.6674) time: 0.5228 data: 0.0002 max mem: 29510
Epoch: [3] [ 720/1251] eta: 0:04:43 lr: 0.000010 loss: 2.3253 (2.6656) time: 0.5232 data: 0.0002 max mem: 29510
Epoch: [3] [ 730/1251] eta: 0:04:37 lr: 0.000010 loss: 2.6686 (2.6648) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [3] [ 740/1251] eta: 0:04:32 lr: 0.000010 loss: 2.7536 (2.6655) time: 0.5275 data: 0.0003 max mem: 29510
Epoch: [3] [ 750/1251] eta: 0:04:27 lr: 0.000010 loss: 2.7770 (2.6653) time: 0.5207 data: 0.0002 max mem: 29510
Epoch: [3] [ 760/1251] eta: 0:04:21 lr: 0.000010 loss: 2.7770 (2.6658) time: 0.5191 data: 0.0002 max mem: 29510
Epoch: [3] [ 770/1251] eta: 0:04:16 lr: 0.000010 loss: 2.9562 (2.6688) time: 0.5231 data: 0.0002 max mem: 29510
Epoch: [3] [ 780/1251] eta: 0:04:10 lr: 0.000010 loss: 2.8870 (2.6708) time: 0.5273 data: 0.0002 max mem: 29510
Epoch: [3] [ 790/1251] eta: 0:04:05 lr: 0.000010 loss: 2.8247 (2.6711) time: 0.5285 data: 0.0002 max mem: 29510
Epoch: [3] [ 800/1251] eta: 0:04:00 lr: 0.000010 loss: 2.7099 (2.6704) time: 0.5255 data: 0.0002 max mem: 29510
Epoch: [3] [ 810/1251] eta: 0:03:54 lr: 0.000010 loss: 2.8331 (2.6717) time: 0.5218 data: 0.0002 max mem: 29510
Epoch: [3] [ 820/1251] eta: 0:03:49 lr: 0.000010 loss: 2.7966 (2.6713) time: 0.5196 data: 0.0002 max mem: 29510
Epoch: [3] [ 830/1251] eta: 0:03:44 lr: 0.000010 loss: 2.7071 (2.6713) time: 0.5218 data: 0.0002 max mem: 29510
Epoch: [3] [ 840/1251] eta: 0:03:38 lr: 0.000010 loss: 2.8213 (2.6729) time: 0.5252 data: 0.0002 max mem: 29510
Epoch: [3] [ 850/1251] eta: 0:03:33 lr: 0.000010 loss: 2.8216 (2.6731) time: 0.5261 data: 0.0002 max mem: 29510
Epoch: [3] [ 860/1251] eta: 0:03:27 lr: 0.000010 loss: 2.8720 (2.6740) time: 0.5246 data: 0.0002 max mem: 29510
Epoch: [3] [ 870/1251] eta: 0:03:22 lr: 0.000010 loss: 2.8995 (2.6747) time: 0.5252 data: 0.0002 max mem: 29510
Epoch: [3] [ 880/1251] eta: 0:03:17 lr: 0.000010 loss: 2.8005 (2.6736) time: 0.5259 data: 0.0002 max mem: 29510
Epoch: [3] [ 890/1251] eta: 0:03:11 lr: 0.000010 loss: 2.7792 (2.6723) time: 0.5265 data: 0.0002 max mem: 29510
Epoch: [3] [ 900/1251] eta: 0:03:06 lr: 0.000010 loss: 2.7272 (2.6716) time: 0.5285 data: 0.0002 max mem: 29510
Epoch: [3] [ 910/1251] eta: 0:03:01 lr: 0.000010 loss: 2.7952 (2.6729) time: 0.5268 data: 0.0003 max mem: 29510
Epoch: [3] [ 920/1251] eta: 0:02:55 lr: 0.000010 loss: 2.8411 (2.6736) time: 0.5221 data: 0.0003 max mem: 29510
Epoch: [3] [ 930/1251] eta: 0:02:50 lr: 0.000010 loss: 2.8753 (2.6754) time: 0.5207 data: 0.0002 max mem: 29510
Epoch: [3] [ 940/1251] eta: 0:02:45 lr: 0.000010 loss: 2.8753 (2.6766) time: 0.5222 data: 0.0002 max mem: 29510
Epoch: [3] [ 950/1251] eta: 0:02:39 lr: 0.000010 loss: 2.7550 (2.6751) time: 0.5225 data: 0.0002 max mem: 29510
Epoch: [3] [ 960/1251] eta: 0:02:34 lr: 0.000010 loss: 2.7550 (2.6767) time: 0.5220 data: 0.0002 max mem: 29510
Epoch: [3] [ 970/1251] eta: 0:02:29 lr: 0.000010 loss: 2.8577 (2.6778) time: 0.5204 data: 0.0002 max mem: 29510
Epoch: [3] [ 980/1251] eta: 0:02:23 lr: 0.000010 loss: 2.8621 (2.6787) time: 0.5225 data: 0.0002 max mem: 29510
Epoch: [3] [ 990/1251] eta: 0:02:18 lr: 0.000010 loss: 2.8621 (2.6789) time: 0.5269 data: 0.0002 max mem: 29510
Epoch: [3] [1000/1251] eta: 0:02:13 lr: 0.000010 loss: 2.7344 (2.6787) time: 0.5259 data: 0.0003 max mem: 29510
Epoch: [3] [1010/1251] eta: 0:02:07 lr: 0.000010 loss: 2.6984 (2.6780) time: 0.5229 data: 0.0002 max mem: 29510
Epoch: [3] [1020/1251] eta: 0:02:02 lr: 0.000010 loss: 2.7297 (2.6788) time: 0.5238 data: 0.0002 max mem: 29510
Epoch: [3] [1030/1251] eta: 0:01:57 lr: 0.000010 loss: 2.7490 (2.6778) time: 0.5237 data: 0.0002 max mem: 29510
Epoch: [3] [1040/1251] eta: 0:01:51 lr: 0.000010 loss: 2.6417 (2.6778) time: 0.5239 data: 0.0002 max mem: 29510
Epoch: [3] [1050/1251] eta: 0:01:46 lr: 0.000010 loss: 2.6815 (2.6785) time: 0.5217 data: 0.0002 max mem: 29510
Epoch: [3] [1060/1251] eta: 0:01:41 lr: 0.000010 loss: 2.6354 (2.6765) time: 0.5249 data: 0.0002 max mem: 29510
Epoch: [3] [1070/1251] eta: 0:01:35 lr: 0.000010 loss: 2.3964 (2.6727) time: 0.5279 data: 0.0002 max mem: 29510
Epoch: [3] [1080/1251] eta: 0:01:30 lr: 0.000010 loss: 2.4238 (2.6719) time: 0.5264 data: 0.0003 max mem: 29510
Epoch: [3] [1090/1251] eta: 0:01:25 lr: 0.000010 loss: 2.7715 (2.6729) time: 0.5244 data: 0.0002 max mem: 29510
Epoch: [3] [1100/1251] eta: 0:01:20 lr: 0.000010 loss: 2.9036 (2.6752) time: 0.5237 data: 0.0002 max mem: 29510
Epoch: [3] [1110/1251] eta: 0:01:14 lr: 0.000010 loss: 2.9186 (2.6759) time: 0.5275 data: 0.0002 max mem: 29510
Epoch: [3] [1120/1251] eta: 0:01:09 lr: 0.000010 loss: 2.8108 (2.6756) time: 0.5243 data: 0.0002 max mem: 29510
Epoch: [3] [1130/1251] eta: 0:01:04 lr: 0.000010 loss: 2.7927 (2.6746) time: 0.5256 data: 0.0002 max mem: 29510
Epoch: [3] [1140/1251] eta: 0:00:58 lr: 0.000010 loss: 2.7946 (2.6750) time: 0.5238 data: 0.0002 max mem: 29510
Epoch: [3] [1150/1251] eta: 0:00:53 lr: 0.000010 loss: 2.8406 (2.6759) time: 0.5187 data: 0.0002 max mem: 29510
Epoch: [3] [1160/1251] eta: 0:00:48 lr: 0.000010 loss: 2.7821 (2.6759) time: 0.5248 data: 0.0003 max mem: 29510
Epoch: [3] [1170/1251] eta: 0:00:42 lr: 0.000010 loss: 2.6973 (2.6738) time: 0.5254 data: 0.0002 max mem: 29510
Epoch: [3] [1180/1251] eta: 0:00:37 lr: 0.000010 loss: 2.9501 (2.6761) time: 0.5173 data: 0.0002 max mem: 29510
Epoch: [3] [1190/1251] eta: 0:00:32 lr: 0.000010 loss: 3.0100 (2.6782) time: 0.5183 data: 0.0002 max mem: 29510
Epoch: [3] [1200/1251] eta: 0:00:27 lr: 0.000010 loss: 2.9362 (2.6787) time: 0.5256 data: 0.0002 max mem: 29510
Epoch: [3] [1210/1251] eta: 0:00:21 lr: 0.000010 loss: 2.7210 (2.6781) time: 0.5226 data: 0.0002 max mem: 29510
Epoch: [3] [1220/1251] eta: 0:00:16 lr: 0.000010 loss: 2.7102 (2.6781) time: 0.5201 data: 0.0002 max mem: 29510
Epoch: [3] [1230/1251] eta: 0:00:11 lr: 0.000010 loss: 2.8093 (2.6782) time: 0.5190 data: 0.0002 max mem: 29510
Epoch: [3] [1240/1251] eta: 0:00:05 lr: 0.000010 loss: 2.7604 (2.6785) time: 0.5060 data: 0.0009 max mem: 29510
Epoch: [3] [1250/1251] eta: 0:00:00 lr: 0.000010 loss: 2.8010 (2.6796) time: 0.4937 data: 0.0008 max mem: 29510
Epoch: [3] Total time: 0:11:02 (0.5292 s / it)
Averaged stats: lr: 0.000010 loss: 2.8010 (2.6979)
Test: [ 0/25] eta: 0:02:20 loss: 0.7115 (0.7115) acc1: 90.8000 (90.8000) acc5: 98.8000 (98.8000) time: 5.6346 data: 5.3431 max mem: 29510
Test: [10/25] eta: 0:00:15 loss: 0.8641 (0.8430) acc1: 86.4000 (86.4727) acc5: 98.0000 (98.0364) time: 1.0070 data: 0.6517 max mem: 29510
Test: [20/25] eta: 0:00:03 loss: 0.9588 (0.9654) acc1: 81.2000 (83.1810) acc5: 96.8000 (96.8191) time: 0.4463 data: 0.1120 max mem: 29510
Test: [24/25] eta: 0:00:00 loss: 1.0378 (0.9766) acc1: 80.0000 (82.7680) acc5: 96.4000 (96.8160) time: 0.3471 data: 0.0208 max mem: 29510
Test: Total time: 0:00:15 (0.6378 s / it)
* Acc@1 83.350 Acc@5 96.756 loss 0.972
Accuracy of the network on the 50000 test images: 83.4%
Max accuracy: 83.38%
Epoch: [4] [ 0/1251] eta: 1:32:05 lr: 0.000010 loss: 2.9486 (2.9486) time: 4.4170 data: 3.0424 max mem: 29510
Epoch: [4] [ 10/1251] eta: 0:18:32 lr: 0.000010 loss: 2.7728 (2.7620) time: 0.8962 data: 0.2768 max mem: 29510
Epoch: [4] [ 20/1251] eta: 0:14:50 lr: 0.000010 loss: 2.7728 (2.7912) time: 0.5383 data: 0.0003 max mem: 29510
Epoch: [4] [ 30/1251] eta: 0:13:28 lr: 0.000010 loss: 2.7107 (2.7129) time: 0.5328 data: 0.0002 max mem: 29510
Epoch: [4] [ 40/1251] eta: 0:12:43 lr: 0.000010 loss: 2.7456 (2.7409) time: 0.5328 data: 0.0002 max mem: 29510
Epoch: [4] [ 50/1251] eta: 0:12:11 lr: 0.000010 loss: 2.9573 (2.7609) time: 0.5283 data: 0.0003 max mem: 29510
Epoch: [4] [ 60/1251] eta: 0:11:50 lr: 0.000010 loss: 2.7612 (2.7363) time: 0.5275 data: 0.0002 max mem: 29510
Epoch: [4] [ 70/1251] eta: 0:11:32 lr: 0.000010 loss: 2.6158 (2.7234) time: 0.5278 data: 0.0002 max mem: 29510
Epoch: [4] [ 80/1251] eta: 0:11:18 lr: 0.000010 loss: 2.7926 (2.7293) time: 0.5271 data: 0.0002 max mem: 29510
Epoch: [4] [ 90/1251] eta: 0:11:05 lr: 0.000010 loss: 2.8304 (2.7369) time: 0.5278 data: 0.0002 max mem: 29510
Epoch: [4] [ 100/1251] eta: 0:10:55 lr: 0.000010 loss: 2.7869 (2.7245) time: 0.5280 data: 0.0002 max mem: 29510
Epoch: [4] [ 110/1251] eta: 0:10:44 lr: 0.000010 loss: 2.6566 (2.7134) time: 0.5255 data: 0.0002 max mem: 29510
Epoch: [4] [ 120/1251] eta: 0:10:35 lr: 0.000010 loss: 2.5006 (2.6869) time: 0.5230 data: 0.0002 max mem: 29510
Epoch: [4] [ 130/1251] eta: 0:10:26 lr: 0.000010 loss: 2.5221 (2.6850) time: 0.5237 data: 0.0003 max mem: 29510
Epoch: [4] [ 140/1251] eta: 0:10:18 lr: 0.000010 loss: 2.9637 (2.6887) time: 0.5258 data: 0.0003 max mem: 29510
Epoch: [4] [ 150/1251] eta: 0:10:10 lr: 0.000010 loss: 2.8591 (2.6802) time: 0.5284 data: 0.0002 max mem: 29510
Epoch: [4] [ 160/1251] eta: 0:10:03 lr: 0.000010 loss: 2.5903 (2.6746) time: 0.5291 data: 0.0002 max mem: 29510
Epoch: [4] [ 170/1251] eta: 0:09:56 lr: 0.000010 loss: 2.4113 (2.6613) time: 0.5289 data: 0.0002 max mem: 29510
Epoch: [4] [ 180/1251] eta: 0:09:49 lr: 0.000010 loss: 2.6744 (2.6624) time: 0.5247 data: 0.0002 max mem: 29510
Epoch: [4] [ 190/1251] eta: 0:09:42 lr: 0.000010 loss: 2.7518 (2.6542) time: 0.5257 data: 0.0002 max mem: 29510
Epoch: [4] [ 200/1251] eta: 0:09:35 lr: 0.000010 loss: 2.7970 (2.6612) time: 0.5280 data: 0.0002 max mem: 29510
Epoch: [4] [ 210/1251] eta: 0:09:28 lr: 0.000010 loss: 2.8427 (2.6656) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [4] [ 220/1251] eta: 0:09:22 lr: 0.000010 loss: 2.8427 (2.6634) time: 0.5225 data: 0.0002 max mem: 29510
Epoch: [4] [ 230/1251] eta: 0:09:16 lr: 0.000010 loss: 2.6532 (2.6598) time: 0.5263 data: 0.0002 max mem: 29510
Epoch: [4] [ 240/1251] eta: 0:09:09 lr: 0.000010 loss: 2.5037 (2.6566) time: 0.5275 data: 0.0002 max mem: 29510
Epoch: [4] [ 250/1251] eta: 0:09:03 lr: 0.000010 loss: 2.6423 (2.6570) time: 0.5266 data: 0.0002 max mem: 29510
Epoch: [4] [ 260/1251] eta: 0:08:57 lr: 0.000010 loss: 2.6591 (2.6586) time: 0.5273 data: 0.0002 max mem: 29510
Epoch: [4] [ 270/1251] eta: 0:08:51 lr: 0.000010 loss: 2.7674 (2.6642) time: 0.5204 data: 0.0002 max mem: 29510
Epoch: [4] [ 280/1251] eta: 0:08:45 lr: 0.000010 loss: 2.7674 (2.6617) time: 0.5184 data: 0.0002 max mem: 29510
Epoch: [4] [ 290/1251] eta: 0:08:39 lr: 0.000010 loss: 2.7203 (2.6591) time: 0.5262 data: 0.0002 max mem: 29510
Epoch: [4] [ 300/1251] eta: 0:08:33 lr: 0.000010 loss: 2.8343 (2.6629) time: 0.5287 data: 0.0017 max mem: 29510
Epoch: [4] [ 310/1251] eta: 0:08:27 lr: 0.000010 loss: 2.7671 (2.6552) time: 0.5280 data: 0.0017 max mem: 29510
Epoch: [4] [ 320/1251] eta: 0:08:22 lr: 0.000010 loss: 2.7732 (2.6604) time: 0.5252 data: 0.0003 max mem: 29510
Epoch: [4] [ 330/1251] eta: 0:08:16 lr: 0.000010 loss: 2.8242 (2.6665) time: 0.5255 data: 0.0003 max mem: 29510
Epoch: [4] [ 340/1251] eta: 0:08:10 lr: 0.000010 loss: 2.7911 (2.6626) time: 0.5221 data: 0.0002 max mem: 29510
Epoch: [4] [ 350/1251] eta: 0:08:04 lr: 0.000010 loss: 2.7843 (2.6661) time: 0.5186 data: 0.0002 max mem: 29510
Epoch: [4] [ 360/1251] eta: 0:07:58 lr: 0.000010 loss: 2.8565 (2.6685) time: 0.5207 data: 0.0002 max mem: 29510
Epoch: [4] [ 370/1251] eta: 0:07:53 lr: 0.000010 loss: 2.7831 (2.6664) time: 0.5228 data: 0.0002 max mem: 29510
Epoch: [4] [ 380/1251] eta: 0:07:47 lr: 0.000010 loss: 2.7015 (2.6631) time: 0.5250 data: 0.0002 max mem: 29510
Epoch: [4] [ 390/1251] eta: 0:07:41 lr: 0.000010 loss: 2.7015 (2.6646) time: 0.5266 data: 0.0002 max mem: 29510
Epoch: [4] [ 400/1251] eta: 0:07:36 lr: 0.000010 loss: 2.7603 (2.6674) time: 0.5274 data: 0.0002 max mem: 29510
Epoch: [4] [ 410/1251] eta: 0:07:30 lr: 0.000010 loss: 2.8171 (2.6658) time: 0.5246 data: 0.0002 max mem: 29510
Epoch: [4] [ 420/1251] eta: 0:07:25 lr: 0.000010 loss: 2.8171 (2.6692) time: 0.5228 data: 0.0002 max mem: 29510
Epoch: [4] [ 430/1251] eta: 0:07:19 lr: 0.000010 loss: 2.8832 (2.6684) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [4] [ 440/1251] eta: 0:07:13 lr: 0.000010 loss: 2.7333 (2.6682) time: 0.5248 data: 0.0002 max mem: 29510
Epoch: [4] [ 450/1251] eta: 0:07:08 lr: 0.000010 loss: 2.7788 (2.6691) time: 0.5238 data: 0.0002 max mem: 29510
Epoch: [4] [ 460/1251] eta: 0:07:02 lr: 0.000010 loss: 2.6681 (2.6643) time: 0.5208 data: 0.0002 max mem: 29510
Epoch: [4] [ 470/1251] eta: 0:06:57 lr: 0.000010 loss: 2.5071 (2.6613) time: 0.5196 data: 0.0002 max mem: 29510
Epoch: [4] [ 480/1251] eta: 0:06:51 lr: 0.000010 loss: 2.4923 (2.6572) time: 0.5238 data: 0.0002 max mem: 29510
Epoch: [4] [ 490/1251] eta: 0:06:46 lr: 0.000010 loss: 2.8536 (2.6607) time: 0.5291 data: 0.0002 max mem: 29510
Epoch: [4] [ 500/1251] eta: 0:06:40 lr: 0.000010 loss: 2.8109 (2.6608) time: 0.5329 data: 0.0002 max mem: 29510
Epoch: [4] [ 510/1251] eta: 0:06:35 lr: 0.000010 loss: 2.6752 (2.6616) time: 0.5278 data: 0.0002 max mem: 29510
Epoch: [4] [ 520/1251] eta: 0:06:30 lr: 0.000010 loss: 2.6179 (2.6581) time: 0.5251 data: 0.0002 max mem: 29510
Epoch: [4] [ 530/1251] eta: 0:06:24 lr: 0.000010 loss: 2.6595 (2.6624) time: 0.5273 data: 0.0002 max mem: 29510
Epoch: [4] [ 540/1251] eta: 0:06:19 lr: 0.000010 loss: 2.6582 (2.6561) time: 0.5276 data: 0.0002 max mem: 29510
Epoch: [4] [ 550/1251] eta: 0:06:13 lr: 0.000010 loss: 2.7579 (2.6597) time: 0.5270 data: 0.0002 max mem: 29510
Epoch: [4] [ 560/1251] eta: 0:06:08 lr: 0.000010 loss: 2.7975 (2.6585) time: 0.5248 data: 0.0003 max mem: 29510
Epoch: [4] [ 570/1251] eta: 0:06:02 lr: 0.000010 loss: 2.5910 (2.6566) time: 0.5250 data: 0.0003 max mem: 29510
Epoch: [4] [ 580/1251] eta: 0:05:57 lr: 0.000010 loss: 2.6163 (2.6560) time: 0.5285 data: 0.0002 max mem: 29510
Epoch: [4] [ 590/1251] eta: 0:05:52 lr: 0.000010 loss: 2.8231 (2.6588) time: 0.5260 data: 0.0002 max mem: 29510
Epoch: [4] [ 600/1251] eta: 0:05:46 lr: 0.000010 loss: 2.7542 (2.6575) time: 0.5203 data: 0.0003 max mem: 29510
Epoch: [4] [ 610/1251] eta: 0:05:41 lr: 0.000010 loss: 2.5722 (2.6582) time: 0.5170 data: 0.0003 max mem: 29510
Epoch: [4] [ 620/1251] eta: 0:05:35 lr: 0.000010 loss: 2.5722 (2.6553) time: 0.5223 data: 0.0003 max mem: 29510
Epoch: [4] [ 630/1251] eta: 0:05:30 lr: 0.000010 loss: 2.6039 (2.6540) time: 0.5303 data: 0.0003 max mem: 29510
Epoch: [4] [ 640/1251] eta: 0:05:25 lr: 0.000010 loss: 2.5207 (2.6496) time: 0.5289 data: 0.0002 max mem: 29510
Epoch: [4] [ 650/1251] eta: 0:05:19 lr: 0.000010 loss: 2.5207 (2.6514) time: 0.5290 data: 0.0002 max mem: 29510
Epoch: [4] [ 660/1251] eta: 0:05:14 lr: 0.000010 loss: 2.8743 (2.6519) time: 0.5269 data: 0.0002 max mem: 29510
Epoch: [4] [ 670/1251] eta: 0:05:09 lr: 0.000010 loss: 2.6562 (2.6530) time: 0.5271 data: 0.0002 max mem: 29510
Epoch: [4] [ 680/1251] eta: 0:05:03 lr: 0.000010 loss: 2.7529 (2.6542) time: 0.5243 data: 0.0002 max mem: 29510
Epoch: [4] [ 690/1251] eta: 0:04:58 lr: 0.000010 loss: 2.4776 (2.6507) time: 0.5179 data: 0.0002 max mem: 29510
Epoch: [4] [ 700/1251] eta: 0:04:52 lr: 0.000010 loss: 2.5679 (2.6516) time: 0.5208 data: 0.0002 max mem: 29510
Epoch: [4] [ 710/1251] eta: 0:04:47 lr: 0.000010 loss: 2.8348 (2.6536) time: 0.5238 data: 0.0003 max mem: 29510
Epoch: [4] [ 720/1251] eta: 0:04:42 lr: 0.000010 loss: 2.8236 (2.6536) time: 0.5272 data: 0.0002 max mem: 29510
Epoch: [4] [ 730/1251] eta: 0:04:36 lr: 0.000010 loss: 2.7889 (2.6537) time: 0.5275 data: 0.0002 max mem: 29510
Epoch: [4] [ 740/1251] eta: 0:04:31 lr: 0.000010 loss: 2.7889 (2.6549) time: 0.5264 data: 0.0002 max mem: 29510
Epoch: [4] [ 750/1251] eta: 0:04:26 lr: 0.000010 loss: 2.6445 (2.6533) time: 0.5280 data: 0.0002 max mem: 29510
Epoch: [4] [ 760/1251] eta: 0:04:20 lr: 0.000010 loss: 2.7384 (2.6560) time: 0.5222 data: 0.0002 max mem: 29510
Epoch: [4] [ 770/1251] eta: 0:04:15 lr: 0.000010 loss: 2.8383 (2.6562) time: 0.5241 data: 0.0003 max mem: 29510
Epoch: [4] [ 780/1251] eta: 0:04:09 lr: 0.000010 loss: 2.8186 (2.6569) time: 0.5240 data: 0.0002 max mem: 29510
Epoch: [4] [ 790/1251] eta: 0:04:04 lr: 0.000010 loss: 2.7956 (2.6550) time: 0.5190 data: 0.0002 max mem: 29510
Epoch: [4] [ 800/1251] eta: 0:03:59 lr: 0.000010 loss: 2.5153 (2.6544) time: 0.5251 data: 0.0002 max mem: 29510
Epoch: [4] [ 810/1251] eta: 0:03:53 lr: 0.000010 loss: 2.8216 (2.6566) time: 0.5234 data: 0.0002 max mem: 29510
Epoch: [4] [ 820/1251] eta: 0:03:48 lr: 0.000010 loss: 2.8757 (2.6559) time: 0.5199 data: 0.0003 max mem: 29510
Epoch: [4] [ 830/1251] eta: 0:03:43 lr: 0.000010 loss: 2.5613 (2.6542) time: 0.5219 data: 0.0002 max mem: 29510
Epoch: [4] [ 840/1251] eta: 0:03:37 lr: 0.000010 loss: 2.7839 (2.6558) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [4] [ 850/1251] eta: 0:03:32 lr: 0.000010 loss: 2.8007 (2.6585) time: 0.5248 data: 0.0002 max mem: 29510
Epoch: [4] [ 860/1251] eta: 0:03:27 lr: 0.000010 loss: 2.8630 (2.6608) time: 0.5220 data: 0.0002 max mem: 29510
Epoch: [4] [ 870/1251] eta: 0:03:21 lr: 0.000010 loss: 2.7852 (2.6598) time: 0.5242 data: 0.0002 max mem: 29510
Epoch: [4] [ 880/1251] eta: 0:03:16 lr: 0.000010 loss: 2.7020 (2.6605) time: 0.5251 data: 0.0003 max mem: 29510
Epoch: [4] [ 890/1251] eta: 0:03:11 lr: 0.000010 loss: 2.7237 (2.6579) time: 0.5246 data: 0.0002 max mem: 29510
Epoch: [4] [ 900/1251] eta: 0:03:05 lr: 0.000010 loss: 2.8096 (2.6574) time: 0.5265 data: 0.0002 max mem: 29510
Epoch: [4] [ 910/1251] eta: 0:03:00 lr: 0.000010 loss: 2.8470 (2.6593) time: 0.5275 data: 0.0002 max mem: 29510
Epoch: [4] [ 920/1251] eta: 0:02:55 lr: 0.000010 loss: 2.8577 (2.6600) time: 0.5258 data: 0.0002 max mem: 29510
Epoch: [4] [ 930/1251] eta: 0:02:50 lr: 0.000010 loss: 2.8252 (2.6613) time: 0.5245 data: 0.0002 max mem: 29510
Epoch: [4] [ 940/1251] eta: 0:02:44 lr: 0.000010 loss: 2.8252 (2.6624) time: 0.5229 data: 0.0002 max mem: 29510
Epoch: [4] [ 950/1251] eta: 0:02:39 lr: 0.000010 loss: 2.7138 (2.6623) time: 0.5176 data: 0.0002 max mem: 29510
Epoch: [4] [ 960/1251] eta: 0:02:34 lr: 0.000010 loss: 2.6995 (2.6623) time: 0.5171 data: 0.0003 max mem: 29510
Epoch: [4] [ 970/1251] eta: 0:02:28 lr: 0.000010 loss: 2.7433 (2.6637) time: 0.5268 data: 0.0002 max mem: 29510
Epoch: [4] [ 980/1251] eta: 0:02:23 lr: 0.000010 loss: 2.7544 (2.6634) time: 0.5247 data: 0.0002 max mem: 29510
Epoch: [4] [ 990/1251] eta: 0:02:18 lr: 0.000010 loss: 2.7413 (2.6638) time: 0.5216 data: 0.0002 max mem: 29510
Epoch: [4] [1000/1251] eta: 0:02:12 lr: 0.000010 loss: 2.8740 (2.6644) time: 0.5252 data: 0.0002 max mem: 29510
Epoch: [4] [1010/1251] eta: 0:02:07 lr: 0.000010 loss: 2.7859 (2.6649) time: 0.5197 data: 0.0002 max mem: 29510
Epoch: [4] [1020/1251] eta: 0:02:02 lr: 0.000010 loss: 2.5595 (2.6625) time: 0.5183 data: 0.0002 max mem: 29510
Epoch: [4] [1030/1251] eta: 0:01:56 lr: 0.000010 loss: 2.2989 (2.6573) time: 0.5195 data: 0.0002 max mem: 29510
Epoch: [4] [1040/1251] eta: 0:01:51 lr: 0.000010 loss: 2.4963 (2.6573) time: 0.5183 data: 0.0002 max mem: 29510
Epoch: [4] [1050/1251] eta: 0:01:46 lr: 0.000010 loss: 2.6494 (2.6568) time: 0.5203 data: 0.0002 max mem: 29510
Epoch: [4] [1060/1251] eta: 0:01:40 lr: 0.000010 loss: 2.4917 (2.6551) time: 0.5243 data: 0.0002 max mem: 29510
Epoch: [4] [1070/1251] eta: 0:01:35 lr: 0.000010 loss: 2.5466 (2.6542) time: 0.5266 data: 0.0003 max mem: 29510
Epoch: [4] [1080/1251] eta: 0:01:30 lr: 0.000010 loss: 2.4850 (2.6532) time: 0.5234 data: 0.0002 max mem: 29510
Epoch: [4] [1090/1251] eta: 0:01:25 lr: 0.000010 loss: 2.5440 (2.6533) time: 0.5202 data: 0.0002 max mem: 29510
Epoch: [4] [1100/1251] eta: 0:01:19 lr: 0.000010 loss: 2.8690 (2.6549) time: 0.5192 data: 0.0002 max mem: 29510
Epoch: [4] [1110/1251] eta: 0:01:14 lr: 0.000010 loss: 2.8161 (2.6550) time: 0.5261 data: 0.0002 max mem: 29510
Epoch: [4] [1120/1251] eta: 0:01:09 lr: 0.000010 loss: 2.6813 (2.6554) time: 0.5269 data: 0.0002 max mem: 29510
Epoch: [4] [1130/1251] eta: 0:01:03 lr: 0.000010 loss: 2.7972 (2.6557) time: 0.5210 data: 0.0002 max mem: 29510
Epoch: [4] [1140/1251] eta: 0:00:58 lr: 0.000010 loss: 2.9039 (2.6560) time: 0.5208 data: 0.0003 max mem: 29510
Epoch: [4] [1150/1251] eta: 0:00:53 lr: 0.000010 loss: 2.9231 (2.6574) time: 0.5214 data: 0.0002 max mem: 29510
Epoch: [4] [1160/1251] eta: 0:00:48 lr: 0.000010 loss: 2.8721 (2.6568) time: 0.5191 data: 0.0002 max mem: 29510
Epoch: [4] [1170/1251] eta: 0:00:42 lr: 0.000010 loss: 2.8097 (2.6583) time: 0.5194 data: 0.0002 max mem: 29510
Epoch: [4] [1180/1251] eta: 0:00:37 lr: 0.000010 loss: 2.8799 (2.6591) time: 0.5227 data: 0.0002 max mem: 29510
Epoch: [4] [1190/1251] eta: 0:00:32 lr: 0.000010 loss: 2.8799 (2.6597) time: 0.5229 data: 0.0002 max mem: 29510
Epoch: [4] [1200/1251] eta: 0:00:26 lr: 0.000010 loss: 2.8538 (2.6612) time: 0.5254 data: 0.0002 max mem: 29510
Epoch: [4] [1210/1251] eta: 0:00:21 lr: 0.000010 loss: 2.8019 (2.6599) time: 0.5268 data: 0.0003 max mem: 29510
Epoch: [4] [1220/1251] eta: 0:00:16 lr: 0.000010 loss: 2.5145 (2.6591) time: 0.5252 data: 0.0003 max mem: 29510
Epoch: [4] [1230/1251] eta: 0:00:11 lr: 0.000010 loss: 2.8209 (2.6607) time: 0.5226 data: 0.0003 max mem: 29510
Epoch: [4] [1240/1251] eta: 0:00:05 lr: 0.000010 loss: 2.8579 (2.6605) time: 0.5089 data: 0.0009 max mem: 29510
Epoch: [4] [1250/1251] eta: 0:00:00 lr: 0.000010 loss: 2.7235 (2.6595) time: 0.4935 data: 0.0008 max mem: 29510
Epoch: [4] Total time: 0:11:00 (0.5278 s / it)
Averaged stats: lr: 0.000010 loss: 2.7235 (2.6858)
Test: [ 0/25] eta: 0:03:13 loss: 0.6208 (0.6208) acc1: 90.4000 (90.4000) acc5: 98.8000 (98.8000) time: 7.7392 data: 7.4484 max mem: 29510
Test: [10/25] eta: 0:00:16 loss: 0.7732 (0.7499) acc1: 86.8000 (86.6546) acc5: 98.0000 (98.0000) time: 1.0868 data: 0.7626 max mem: 29510
Test: [20/25] eta: 0:00:03 loss: 0.8701 (0.8708) acc1: 81.2000 (83.2191) acc5: 96.8000 (96.8381) time: 0.3640 data: 0.0471 max mem: 29510
Test: [24/25] eta: 0:00:00 loss: 0.9372 (0.8809) acc1: 80.0000 (82.8800) acc5: 96.8000 (96.8320) time: 0.3574 data: 0.0470 max mem: 29510
Test: Total time: 0:00:16 (0.6561 s / it)
* Acc@1 83.422 Acc@5 96.788 loss 0.875
Accuracy of the network on the 50000 test images: 83.4%
Max accuracy: 83.42%
Epoch: [5] [ 0/1251] eta: 1:20:44 lr: 0.000010 loss: 3.3573 (3.3573) time: 3.8726 data: 2.9053 max mem: 29510
Epoch: [5] [ 10/1251] eta: 0:17:54 lr: 0.000010 loss: 2.8796 (2.6946) time: 0.8659 data: 0.2846 max mem: 29510
Epoch: [5] [ 20/1251] eta: 0:14:29 lr: 0.000010 loss: 2.4912 (2.5490) time: 0.5479 data: 0.0114 max mem: 29510
Epoch: [5] [ 30/1251] eta: 0:13:12 lr: 0.000010 loss: 2.4912 (2.5855) time: 0.5291 data: 0.0002 max mem: 29510
Epoch: [5] [ 40/1251] eta: 0:12:24 lr: 0.000010 loss: 2.7344 (2.6217) time: 0.5195 data: 0.0002 max mem: 29510
Epoch: [5] [ 50/1251] eta: 0:11:58 lr: 0.000010 loss: 2.8238 (2.6108) time: 0.5201 data: 0.0002 max mem: 29510
Epoch: [5] [ 60/1251] eta: 0:11:38 lr: 0.000010 loss: 2.8366 (2.6600) time: 0.5266 data: 0.0002 max mem: 29510
Epoch: [5] [ 70/1251] eta: 0:11:22 lr: 0.000010 loss: 2.8373 (2.6559) time: 0.5267 data: 0.0002 max mem: 29510
Epoch: [5] [ 80/1251] eta: 0:11:09 lr: 0.000010 loss: 2.9241 (2.6766) time: 0.5295 data: 0.0002 max mem: 29510
Epoch: [5] [ 90/1251] eta: 0:10:59 lr: 0.000010 loss: 2.9020 (2.6738) time: 0.5314 data: 0.0002 max mem: 29510
Epoch: [5] [ 100/1251] eta: 0:10:48 lr: 0.000010 loss: 2.7789 (2.6689) time: 0.5294 data: 0.0003 max mem: 29510
Epoch: [5] [ 110/1251] eta: 0:10:39 lr: 0.000010 loss: 2.6567 (2.6650) time: 0.5278 data: 0.0003 max mem: 29510
Epoch: [5] [ 120/1251] eta: 0:10:30 lr: 0.000010 loss: 2.6883 (2.6621) time: 0.5260 data: 0.0003 max mem: 29510
Epoch: [5] [ 130/1251] eta: 0:10:22 lr: 0.000010 loss: 2.4570 (2.6385) time: 0.5252 data: 0.0002 max mem: 29510
Epoch: [5] [ 140/1251] eta: 0:10:14 lr: 0.000010 loss: 2.4570 (2.6372) time: 0.5277 data: 0.0002 max mem: 29510
Epoch: [5] [ 150/1251] eta: 0:10:07 lr: 0.000010 loss: 2.7868 (2.6439) time: 0.5284 data: 0.0002 max mem: 29510
Epoch: [5] [ 160/1251] eta: 0:09:59 lr: 0.000010 loss: 2.7868 (2.6417) time: 0.5261 data: 0.0003 max mem: 29510
Epoch: [5] [ 170/1251] eta: 0:09:52 lr: 0.000010 loss: 2.8033 (2.6566) time: 0.5228 data: 0.0002 max mem: 29510
Epoch: [5] [ 180/1251] eta: 0:09:45 lr: 0.000010 loss: 2.8364 (2.6564) time: 0.5245 data: 0.0002 max mem: 29510
Epoch: [5] [ 190/1251] eta: 0:09:39 lr: 0.000010 loss: 2.6101 (2.6529) time: 0.5262 data: 0.0002 max mem: 29510
Epoch: [5] [ 200/1251] eta: 0:09:32 lr: 0.000010 loss: 2.8783 (2.6655) time: 0.5265 data: 0.0002 max mem: 29510
Epoch: [5] [ 210/1251] eta: 0:09:26 lr: 0.000010 loss: 2.8380 (2.6654) time: 0.5224 data: 0.0002 max mem: 29510
Epoch: [5] [ 220/1251] eta: 0:09:19 lr: 0.000010 loss: 2.8085 (2.6699) time: 0.5225 data: 0.0002 max mem: 29510
Epoch: [5] [ 230/1251] eta: 0:09:13 lr: 0.000010 loss: 2.8219 (2.6669) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [5] [ 240/1251] eta: 0:09:07 lr: 0.000010 loss: 2.6856 (2.6594) time: 0.5263 data: 0.0002 max mem: 29510
Epoch: [5] [ 250/1251] eta: 0:09:01 lr: 0.000010 loss: 2.6389 (2.6639) time: 0.5257 data: 0.0002 max mem: 29510
Epoch: [5] [ 260/1251] eta: 0:08:55 lr: 0.000010 loss: 2.8987 (2.6681) time: 0.5224 data: 0.0003 max mem: 29510
Epoch: [5] [ 270/1251] eta: 0:08:49 lr: 0.000010 loss: 2.7545 (2.6710) time: 0.5224 data: 0.0003 max mem: 29510
Epoch: [5] [ 280/1251] eta: 0:08:43 lr: 0.000010 loss: 2.8462 (2.6768) time: 0.5231 data: 0.0002 max mem: 29510
Epoch: [5] [ 290/1251] eta: 0:08:37 lr: 0.000010 loss: 2.8462 (2.6745) time: 0.5235 data: 0.0002 max mem: 29510
Epoch: [5] [ 300/1251] eta: 0:08:31 lr: 0.000010 loss: 2.7201 (2.6809) time: 0.5242 data: 0.0002 max mem: 29510
Epoch: [5] [ 310/1251] eta: 0:08:25 lr: 0.000010 loss: 2.7201 (2.6770) time: 0.5268 data: 0.0002 max mem: 29510
Epoch: [5] [ 320/1251] eta: 0:08:20 lr: 0.000010 loss: 2.5924 (2.6718) time: 0.5270 data: 0.0002 max mem: 29510
Epoch: [5] [ 330/1251] eta: 0:08:14 lr: 0.000010 loss: 2.6852 (2.6719) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [5] [ 340/1251] eta: 0:08:08 lr: 0.000010 loss: 2.6852 (2.6696) time: 0.5221 data: 0.0002 max mem: 29510
Epoch: [5] [ 350/1251] eta: 0:08:02 lr: 0.000010 loss: 2.6393 (2.6724) time: 0.5209 data: 0.0002 max mem: 29510
Epoch: [5] [ 360/1251] eta: 0:07:57 lr: 0.000010 loss: 2.9369 (2.6791) time: 0.5228 data: 0.0002 max mem: 29510
Epoch: [5] [ 370/1251] eta: 0:07:51 lr: 0.000010 loss: 2.9369 (2.6838) time: 0.5210 data: 0.0002 max mem: 29510
Epoch: [5] [ 380/1251] eta: 0:07:45 lr: 0.000010 loss: 2.8228 (2.6862) time: 0.5209 data: 0.0002 max mem: 29510
Epoch: [5] [ 390/1251] eta: 0:07:40 lr: 0.000010 loss: 2.6902 (2.6872) time: 0.5201 data: 0.0003 max mem: 29510
Epoch: [5] [ 400/1251] eta: 0:07:34 lr: 0.000010 loss: 2.6839 (2.6869) time: 0.5202 data: 0.0002 max mem: 29510
Epoch: [5] [ 410/1251] eta: 0:07:29 lr: 0.000010 loss: 2.7119 (2.6850) time: 0.5289 data: 0.0002 max mem: 29510
Epoch: [5] [ 420/1251] eta: 0:07:23 lr: 0.000010 loss: 2.7032 (2.6839) time: 0.5301 data: 0.0002 max mem: 29510
Epoch: [5] [ 430/1251] eta: 0:07:18 lr: 0.000010 loss: 2.4918 (2.6779) time: 0.5256 data: 0.0002 max mem: 29510
Epoch: [5] [ 440/1251] eta: 0:07:12 lr: 0.000010 loss: 2.4918 (2.6762) time: 0.5239 data: 0.0002 max mem: 29510
Epoch: [5] [ 450/1251] eta: 0:07:07 lr: 0.000010 loss: 2.4745 (2.6724) time: 0.5254 data: 0.0003 max mem: 29510
Epoch: [5] [ 460/1251] eta: 0:07:01 lr: 0.000010 loss: 2.4180 (2.6659) time: 0.5262 data: 0.0002 max mem: 29510
Epoch: [5] [ 470/1251] eta: 0:06:56 lr: 0.000010 loss: 2.3769 (2.6616) time: 0.5260 data: 0.0002 max mem: 29510
Epoch: [5] [ 480/1251] eta: 0:06:50 lr: 0.000010 loss: 2.6005 (2.6612) time: 0.5260 data: 0.0002 max mem: 29510
Epoch: [5] [ 490/1251] eta: 0:06:45 lr: 0.000010 loss: 2.8191 (2.6629) time: 0.5248 data: 0.0002 max mem: 29510
Epoch: [5] [ 500/1251] eta: 0:06:39 lr: 0.000010 loss: 2.8296 (2.6637) time: 0.5210 data: 0.0002 max mem: 29510
Epoch: [5] [ 510/1251] eta: 0:06:34 lr: 0.000010 loss: 2.7130 (2.6636) time: 0.5193 data: 0.0002 max mem: 29510
Epoch: [5] [ 520/1251] eta: 0:06:28 lr: 0.000010 loss: 2.7130 (2.6647) time: 0.5200 data: 0.0003 max mem: 29510
Epoch: [5] [ 530/1251] eta: 0:06:23 lr: 0.000010 loss: 2.8479 (2.6635) time: 0.5229 data: 0.0002 max mem: 29510
Epoch: [5] [ 540/1251] eta: 0:06:18 lr: 0.000010 loss: 2.6306 (2.6614) time: 0.5274 data: 0.0002 max mem: 29510
Epoch: [5] [ 550/1251] eta: 0:06:12 lr: 0.000010 loss: 2.6306 (2.6611) time: 0.5301 data: 0.0002 max mem: 29510
Epoch: [5] [ 560/1251] eta: 0:06:07 lr: 0.000010 loss: 2.6896 (2.6583) time: 0.5300 data: 0.0002 max mem: 29510
Epoch: [5] [ 570/1251] eta: 0:06:02 lr: 0.000010 loss: 2.8765 (2.6597) time: 0.5270 data: 0.0002 max mem: 29510
Epoch: [5] [ 580/1251] eta: 0:05:56 lr: 0.000010 loss: 2.8686 (2.6611) time: 0.5275 data: 0.0002 max mem: 29510
Epoch: [5] [ 590/1251] eta: 0:05:51 lr: 0.000010 loss: 2.8402 (2.6635) time: 0.5255 data: 0.0002 max mem: 29510
Epoch: [5] [ 600/1251] eta: 0:05:45 lr: 0.000010 loss: 2.8402 (2.6648) time: 0.5277 data: 0.0002 max mem: 29510
Epoch: [5] [ 610/1251] eta: 0:05:40 lr: 0.000010 loss: 2.7775 (2.6644) time: 0.5251 data: 0.0003 max mem: 29510
Epoch: [5] [ 620/1251] eta: 0:05:35 lr: 0.000010 loss: 2.7902 (2.6663) time: 0.5205 data: 0.0003 max mem: 29510
Epoch: [5] [ 630/1251] eta: 0:05:29 lr: 0.000010 loss: 2.8226 (2.6661) time: 0.5238 data: 0.0002 max mem: 29510
Epoch: [5] [ 640/1251] eta: 0:05:24 lr: 0.000010 loss: 2.7266 (2.6655) time: 0.5225 data: 0.0002 max mem: 29510
Epoch: [5] [ 650/1251] eta: 0:05:18 lr: 0.000010 loss: 2.8393 (2.6676) time: 0.5244 data: 0.0002 max mem: 29510
Epoch: [5] [ 660/1251] eta: 0:05:13 lr: 0.000010 loss: 2.9421 (2.6682) time: 0.5244 data: 0.0002 max mem: 29510
Epoch: [5] [ 670/1251] eta: 0:05:08 lr: 0.000010 loss: 2.7119 (2.6697) time: 0.5224 data: 0.0002 max mem: 29510
Epoch: [5] [ 680/1251] eta: 0:05:02 lr: 0.000010 loss: 2.7090 (2.6694) time: 0.5262 data: 0.0002 max mem: 29510
Epoch: [5] [ 690/1251] eta: 0:04:57 lr: 0.000010 loss: 2.7090 (2.6694) time: 0.5248 data: 0.0002 max mem: 29510
Epoch: [5] [ 700/1251] eta: 0:04:52 lr: 0.000010 loss: 2.6314 (2.6657) time: 0.5220 data: 0.0003 max mem: 29510
Epoch: [5] [ 710/1251] eta: 0:04:46 lr: 0.000010 loss: 2.2920 (2.6637) time: 0.5242 data: 0.0003 max mem: 29510
Epoch: [5] [ 720/1251] eta: 0:04:41 lr: 0.000010 loss: 2.5913 (2.6630) time: 0.5261 data: 0.0002 max mem: 29510
Epoch: [5] [ 730/1251] eta: 0:04:36 lr: 0.000010 loss: 2.7125 (2.6645) time: 0.5246 data: 0.0002 max mem: 29510
Epoch: [5] [ 740/1251] eta: 0:04:30 lr: 0.000010 loss: 2.7435 (2.6651) time: 0.5229 data: 0.0002 max mem: 29510
Epoch: [5] [ 750/1251] eta: 0:04:25 lr: 0.000010 loss: 2.7593 (2.6667) time: 0.5225 data: 0.0002 max mem: 29510
Epoch: [5] [ 760/1251] eta: 0:04:20 lr: 0.000010 loss: 2.7640 (2.6666) time: 0.5247 data: 0.0002 max mem: 29510
Epoch: [5] [ 770/1251] eta: 0:04:14 lr: 0.000010 loss: 2.8184 (2.6691) time: 0.5247 data: 0.0002 max mem: 29510
Epoch: [5] [ 780/1251] eta: 0:04:09 lr: 0.000010 loss: 2.7910 (2.6704) time: 0.5184 data: 0.0002 max mem: 29510
Epoch: [5] [ 790/1251] eta: 0:04:04 lr: 0.000010 loss: 2.6368 (2.6680) time: 0.5192 data: 0.0002 max mem: 29510
Epoch: [5] [ 800/1251] eta: 0:03:58 lr: 0.000010 loss: 2.6570 (2.6700) time: 0.5209 data: 0.0002 max mem: 29510
Epoch: [5] [ 810/1251] eta: 0:03:53 lr: 0.000010 loss: 2.9020 (2.6690) time: 0.5207 data: 0.0002 max mem: 29510
Epoch: [5] [ 820/1251] eta: 0:03:48 lr: 0.000010 loss: 2.6519 (2.6693) time: 0.5213 data: 0.0002 max mem: 29510
Epoch: [5] [ 830/1251] eta: 0:03:42 lr: 0.000010 loss: 2.7468 (2.6697) time: 0.5185 data: 0.0002 max mem: 29510
Epoch: [5] [ 840/1251] eta: 0:03:37 lr: 0.000010 loss: 2.7468 (2.6686) time: 0.5223 data: 0.0002 max mem: 29510
Epoch: [5] [ 850/1251] eta: 0:03:32 lr: 0.000010 loss: 2.7474 (2.6692) time: 0.5269 data: 0.0002 max mem: 29510
Epoch: [5] [ 860/1251] eta: 0:03:26 lr: 0.000010 loss: 2.7383 (2.6683) time: 0.5231 data: 0.0002 max mem: 29510
Epoch: [5] [ 870/1251] eta: 0:03:21 lr: 0.000010 loss: 2.5355 (2.6662) time: 0.5189 data: 0.0002 max mem: 29510
Epoch: [5] [ 880/1251] eta: 0:03:16 lr: 0.000010 loss: 2.7205 (2.6666) time: 0.5193 data: 0.0002 max mem: 29510
Epoch: [5] [ 890/1251] eta: 0:03:10 lr: 0.000010 loss: 2.7176 (2.6654) time: 0.5212 data: 0.0002 max mem: 29510
Epoch: [5] [ 900/1251] eta: 0:03:05 lr: 0.000010 loss: 2.4462 (2.6636) time: 0.5255 data: 0.0003 max mem: 29510
Epoch: [5] [ 910/1251] eta: 0:03:00 lr: 0.000010 loss: 2.7167 (2.6641) time: 0.5243 data: 0.0002 max mem: 29510
Epoch: [5] [ 920/1251] eta: 0:02:54 lr: 0.000010 loss: 2.9785 (2.6663) time: 0.5237 data: 0.0002 max mem: 29510
Epoch: [5] [ 930/1251] eta: 0:02:49 lr: 0.000010 loss: 2.4333 (2.6629) time: 0.5257 data: 0.0002 max mem: 29510
Epoch: [5] [ 940/1251] eta: 0:02:44 lr: 0.000010 loss: 2.3318 (2.6605) time: 0.5209 data: 0.0002 max mem: 29510
Epoch: [5] [ 950/1251] eta: 0:02:38 lr: 0.000010 loss: 2.5622 (2.6588) time: 0.5213 data: 0.0002 max mem: 29510
Epoch: [5] [ 960/1251] eta: 0:02:33 lr: 0.000010 loss: 2.6982 (2.6587) time: 0.5195 data: 0.0003 max mem: 29510
Epoch: [5] [ 970/1251] eta: 0:02:28 lr: 0.000010 loss: 2.8192 (2.6602) time: 0.5164 data: 0.0002 max mem: 29510
Epoch: [5] [ 980/1251] eta: 0:02:23 lr: 0.000010 loss: 2.8553 (2.6592) time: 0.5179 data: 0.0002 max mem: 29510
Epoch: [5] [ 990/1251] eta: 0:02:17 lr: 0.000010 loss: 2.7115 (2.6601) time: 0.5213 data: 0.0002 max mem: 29510
Epoch: [5] [1000/1251] eta: 0:02:12 lr: 0.000010 loss: 2.8310 (2.6615) time: 0.5231 data: 0.0002 max mem: 29510
Epoch: [5] [1010/1251] eta: 0:02:07 lr: 0.000010 loss: 2.9405 (2.6616) time: 0.5216 data: 0.0002 max mem: 29510
Epoch: [5] [1020/1251] eta: 0:02:01 lr: 0.000010 loss: 2.8825 (2.6626) time: 0.5230 data: 0.0003 max mem: 29510
Epoch: [5] [1030/1251] eta: 0:01:56 lr: 0.000010 loss: 2.7414 (2.6620) time: 0.5273 data: 0.0002 max mem: 29510
Epoch: [5] [1040/1251] eta: 0:01:51 lr: 0.000010 loss: 2.7414 (2.6637) time: 0.5261 data: 0.0003 max mem: 29510
Epoch: [5] [1050/1251] eta: 0:01:46 lr: 0.000010 loss: 2.5936 (2.6611) time: 0.5184 data: 0.0002 max mem: 29510
Epoch: [5] [1060/1251] eta: 0:01:40 lr: 0.000010 loss: 2.5881 (2.6609) time: 0.5238 data: 0.0002 max mem: 29510
Epoch: [5] [1070/1251] eta: 0:01:35 lr: 0.000010 loss: 2.6754 (2.6616) time: 0.5225 data: 0.0002 max mem: 29510
Epoch: [5] [1080/1251] eta: 0:01:30 lr: 0.000010 loss: 2.6754 (2.6601) time: 0.5190 data: 0.0002 max mem: 29510
Epoch: [5] [1090/1251] eta: 0:01:24 lr: 0.000010 loss: 2.6361 (2.6590) time: 0.5230 data: 0.0002 max mem: 29510
Epoch: [5] [1100/1251] eta: 0:01:19 lr: 0.000010 loss: 2.6219 (2.6584) time: 0.5193 data: 0.0002 max mem: 29510
Epoch: [5] [1110/1251] eta: 0:01:14 lr: 0.000010 loss: 2.6219 (2.6586) time: 0.5198 data: 0.0002 max mem: 29510
Epoch: [5] [1120/1251] eta: 0:01:09 lr: 0.000010 loss: 2.8192 (2.6591) time: 0.5221 data: 0.0002 max mem: 29510
Epoch: [5] [1130/1251] eta: 0:01:03 lr: 0.000010 loss: 2.8192 (2.6607) time: 0.5185 data: 0.0002 max mem: 29510
Epoch: [5] [1140/1251] eta: 0:00:58 lr: 0.000010 loss: 2.5499 (2.6593) time: 0.5185 data: 0.0003 max mem: 29510
Epoch: [5] [1150/1251] eta: 0:00:53 lr: 0.000010 loss: 2.6992 (2.6602) time: 0.5198 data: 0.0002 max mem: 29510
Epoch: [5] [1160/1251] eta: 0:00:47 lr: 0.000010 loss: 2.8731 (2.6609) time: 0.5165 data: 0.0002 max mem: 29510
Epoch: [5] [1170/1251] eta: 0:00:42 lr: 0.000010 loss: 2.8602 (2.6610) time: 0.5172 data: 0.0002 max mem: 29510
Epoch: [5] [1180/1251] eta: 0:00:37 lr: 0.000010 loss: 2.7664 (2.6600) time: 0.5219 data: 0.0002 max mem: 29510
Epoch: [5] [1190/1251] eta: 0:00:32 lr: 0.000010 loss: 2.8148 (2.6613) time: 0.5210 data: 0.0002 max mem: 29510
Epoch: [5] [1200/1251] eta: 0:00:26 lr: 0.000010 loss: 2.9089 (2.6635) time: 0.5188 data: 0.0002 max mem: 29510
Epoch: [5] [1210/1251] eta: 0:00:21 lr: 0.000010 loss: 2.9089 (2.6636) time: 0.5204 data: 0.0002 max mem: 29510
Epoch: [5] [1220/1251] eta: 0:00:16 lr: 0.000010 loss: 2.6270 (2.6623) time: 0.5193 data: 0.0002 max mem: 29510
Epoch: [5] [1230/1251] eta: 0:00:11 lr: 0.000010 loss: 2.7483 (2.6635) time: 0.5200 data: 0.0002 max mem: 29510
Epoch: [5] [1240/1251] eta: 0:00:05 lr: 0.000010 loss: 2.7861 (2.6637) time: 0.5097 data: 0.0009 max mem: 29510
Epoch: [5] [1250/1251] eta: 0:00:00 lr: 0.000010 loss: 2.6820 (2.6633) time: 0.4941 data: 0.0009 max mem: 29510
Epoch: [5] Total time: 0:10:58 (0.5265 s / it)
Averaged stats: lr: 0.000010 loss: 2.6820 (2.6803)
Test: [ 0/25] eta: 0:03:31 loss: 0.6653 (0.6653) acc1: 90.8000 (90.8000) acc5: 98.4000 (98.4000) time: 8.4721 data: 8.0803 max mem: 29510
Test: [10/25] eta: 0:00:16 loss: 0.8295 (0.8095) acc1: 86.4000 (86.4000) acc5: 98.0000 (97.9273) time: 1.1002 data: 0.7349 max mem: 29510
Test: [20/25] eta: 0:00:03 loss: 0.9371 (0.9330) acc1: 80.8000 (83.2381) acc5: 96.8000 (96.7429) time: 0.3344 data: 0.0003 max mem: 29510
Test: [24/25] eta: 0:00:00 loss: 1.0046 (0.9432) acc1: 80.4000 (82.8000) acc5: 96.8000 (96.7840) time: 0.3119 data: 0.0002 max mem: 29510
Test: Total time: 0:00:16 (0.6653 s / it)
* Acc@1 83.386 Acc@5 96.744 loss 0.938
Accuracy of the network on the 50000 test images: 83.4%
Max accuracy: 83.42%
Epoch: [6] [ 0/1251] eta: 1:36:59 lr: 0.000010 loss: 2.8362 (2.8362) time: 4.6523 data: 3.3507 max mem: 29510
Epoch: [6] [ 10/1251] eta: 0:18:49 lr: 0.000010 loss: 2.8362 (2.6857) time: 0.9099 data: 0.3048 max mem: 29510
Epoch: [6] [ 20/1251] eta: 0:14:54 lr: 0.000010 loss: 2.8650 (2.6963) time: 0.5305 data: 0.0003 max mem: 29510
Epoch: [6] [ 30/1251] eta: 0:13:26 lr: 0.000010 loss: 2.7638 (2.6328) time: 0.5236 data: 0.0003 max mem: 29510
Epoch: [6] [ 40/1251] eta: 0:12:39 lr: 0.000010 loss: 2.6119 (2.6422) time: 0.5229 data: 0.0003 max mem: 29510
Epoch: [6] [ 50/1251] eta: 0:12:10 lr: 0.000010 loss: 2.4191 (2.5946) time: 0.5275 data: 0.0002 max mem: 29510
Epoch: [6] [ 60/1251] eta: 0:11:48 lr: 0.000010 loss: 2.4191 (2.6082) time: 0.5294 data: 0.0002 max mem: 29510
Epoch: [6] [ 70/1251] eta: 0:11:32 lr: 0.000010 loss: 2.9450 (2.6502) time: 0.5292 data: 0.0002 max mem: 29510
Epoch: [6] [ 80/1251] eta: 0:11:18 lr: 0.000010 loss: 2.8678 (2.6524) time: 0.5302 data: 0.0002 max mem: 29510
Epoch: [6] [ 90/1251] eta: 0:11:07 lr: 0.000010 loss: 2.7428 (2.6669) time: 0.5339 data: 0.0002 max mem: 29510
Epoch: [6] [ 100/1251] eta: 0:10:56 lr: 0.000010 loss: 2.7428 (2.6717) time: 0.5328 data: 0.0002 max mem: 29510
Epoch: [6] [ 110/1251] eta: 0:10:45 lr: 0.000010 loss: 2.7360 (2.6627) time: 0.5273 data: 0.0002 max mem: 29510
Epoch: [6] [ 120/1251] eta: 0:10:35 lr: 0.000010 loss: 2.3733 (2.6313) time: 0.5237 data: 0.0002 max mem: 29510
Epoch: [6] [ 130/1251] eta: 0:10:26 lr: 0.000010 loss: 2.3733 (2.6237) time: 0.5207 data: 0.0002 max mem: 29510
Epoch: [6] [ 140/1251] eta: 0:10:19 lr: 0.000010 loss: 2.5156 (2.6112) time: 0.5274 data: 0.0002 max mem: 29510
Epoch: [6] [ 150/1251] eta: 0:10:11 lr: 0.000010 loss: 2.5943 (2.6161) time: 0.5305 data: 0.0002 max mem: 29510
Epoch: [6] [ 160/1251] eta: 0:10:04 lr: 0.000010 loss: 2.6504 (2.6236) time: 0.5304 data: 0.0002 max mem: 29510
Epoch: [6] [ 170/1251] eta: 0:09:56 lr: 0.000010 loss: 2.7166 (2.6308) time: 0.5260 data: 0.0002 max mem: 29510
Epoch: [6] [ 180/1251] eta: 0:09:49 lr: 0.000010 loss: 2.7120 (2.6401) time: 0.5225 data: 0.0003 max mem: 29510
Epoch: [6] [ 190/1251] eta: 0:09:42 lr: 0.000010 loss: 2.7673 (2.6469) time: 0.5239 data: 0.0002 max mem: 29510
Epoch: [6] [ 200/1251] eta: 0:09:36 lr: 0.000010 loss: 2.7243 (2.6427) time: 0.5285 data: 0.0002 max mem: 29510
Epoch: [6] [ 210/1251] eta: 0:09:29 lr: 0.000010 loss: 2.6489 (2.6531) time: 0.5275 data: 0.0003 max mem: 29510
Epoch: [6] [ 220/1251] eta: 0:09:23 lr: 0.000010 loss: 2.7873 (2.6505) time: 0.5256 data: 0.0002 max mem: 29510
Epoch: [6] [ 230/1251] eta: 0:09:16 lr: 0.000010 loss: 2.7873 (2.6575) time: 0.5277 data: 0.0002 max mem: 29510
Epoch: [6] [ 240/1251] eta: 0:09:10 lr: 0.000010 loss: 2.7146 (2.6539) time: 0.5280 data: 0.0002 max mem: 29510
Epoch: [6] [ 250/1251] eta: 0:09:04 lr: 0.000010 loss: 2.7685 (2.6586) time: 0.5244 data: 0.0002 max mem: 29510
Epoch: [6] [ 260/1251] eta: 0:08:57 lr: 0.000010 loss: 2.8399 (2.6631) time: 0.5195 data: 0.0002 max mem: 29510
Epoch: [6] [ 270/1251] eta: 0:08:52 lr: 0.000010 loss: 2.7949 (2.6614) time: 0.5262 data: 0.0002 max mem: 29510
Epoch: [6] [ 280/1251] eta: 0:08:46 lr: 0.000010 loss: 2.6747 (2.6578) time: 0.5299 data: 0.0002 max mem: 29510
Epoch: [6] [ 290/1251] eta: 0:08:40 lr: 0.000010 loss: 2.7958 (2.6630) time: 0.5278 data: 0.0003 max mem: 29510
Epoch: [6] [ 300/1251] eta: 0:08:34 lr: 0.000010 loss: 2.8962 (2.6703) time: 0.5235 data: 0.0002 max mem: 29510
Epoch: [6] [ 310/1251] eta: 0:08:28 lr: 0.000010 loss: 2.8962 (2.6663) time: 0.5243 data: 0.0002 max mem: 29510
Epoch: [6] [ 320/1251] eta: 0:08:22 lr: 0.000010 loss: 2.7767 (2.6696) time: 0.5289 data: 0.0002 max mem: 29510
Epoch: [6] [ 330/1251] eta: 0:08:16 lr: 0.000010 loss: 2.7677 (2.6654) time: 0.5289 data: 0.0002 max mem: 29510
Epoch: [6] [ 340/1251] eta: 0:08:11 lr: 0.000010 loss: 2.6894 (2.6686) time: 0.5257 data: 0.0002 max mem: 29510
Epoch: [6] [ 350/1251] eta: 0:08:05 lr: 0.000010 loss: 2.6894 (2.6659) time: 0.5229 data: 0.0003 max mem: 29510
Epoch: [6] [ 360/1251] eta: 0:07:59 lr: 0.000010 loss: 2.5601 (2.6587) time: 0.5236 data: 0.0003 max mem: 29510
Epoch: [6] [ 370/1251] eta: 0:07:53 lr: 0.000010 loss: 2.5548 (2.6583) time: 0.5258 data: 0.0002 max mem: 29510
Epoch: [6] [ 380/1251] eta: 0:07:48 lr: 0.000010 loss: 2.5431 (2.6550) time: 0.5272 data: 0.0002 max mem: 29510
Epoch: [6] [ 390/1251] eta: 0:07:42 lr: 0.000010 loss: 2.6136 (2.6586) time: 0.5238 data: 0.0002 max mem: 29510
Epoch: [6] [ 400/1251] eta: 0:07:36 lr: 0.000010 loss: 2.8156 (2.6620) time: 0.5190 data: 0.0002 max mem: 29510
Epoch: [6] [ 410/1251] eta: 0:07:31 lr: 0.000010 loss: 2.6530 (2.6604) time: 0.5222 data: 0.0002 max mem: 29510
Epoch: [6] [ 420/1251] eta: 0:07:25 lr: 0.000010 loss: 2.6530 (2.6618) time: 0.5240 data: 0.0003 max mem: 29510
Epoch: [6] [ 430/1251] eta: 0:07:19 lr: 0.000010 loss: 2.8116 (2.6651) time: 0.5241 data: 0.0002 max mem: 29510
Epoch: [6] [ 440/1251] eta: 0:07:14 lr: 0.000010 loss: 2.8801 (2.6666) time: 0.5251 data: 0.0002 max mem: 29510
Epoch: [6] [ 450/1251] eta: 0:07:08 lr: 0.000010 loss: 2.8801 (2.6670) time: 0.5277 data: 0.0002 max mem: 29510
Epoch: [6] [ 460/1251] eta: 0:07:03 lr: 0.000010 loss: 2.9455 (2.6699) time: 0.5257 data: 0.0002 max mem: 29510
Epoch: [6] [ 470/1251] eta: 0:06:57 lr: 0.000010 loss: 2.9559 (2.6657) time: 0.5228 data: 0.0002 max mem: 29510
Epoch: [6] [ 480/1251] eta: 0:06:52 lr: 0.000010 loss: 2.8247 (2.6704) time: 0.5272 data: 0.0002 max mem: 29510
Epoch: [6] [ 490/1251] eta: 0:06:46 lr: 0.000010 loss: 2.7759 (2.6672) time: 0.5294 data: 0.0002 max mem: 29510
Epoch: [6] [ 500/1251] eta: 0:06:41 lr: 0.000010 loss: 2.6744 (2.6713) time: 0.5290 data: 0.0002 max mem: 29510
Epoch: [6] [ 510/1251] eta: 0:06:35 lr: 0.000010 loss: 2.8921 (2.6752) time: 0.5238 data: 0.0003 max mem: 29510
Epoch: [6] [ 520/1251] eta: 0:06:30 lr: 0.000010 loss: 2.8159 (2.6727) time: 0.5220 data: 0.0003 max mem: 29510
Epoch: [6] [ 530/1251] eta: 0:06:25 lr: 0.000010 loss: 2.6005 (2.6743) time: 0.5274 data: 0.0002 max mem: 29510
Epoch: [6] [ 540/1251] eta: 0:06:19 lr: 0.000010 loss: 2.8998 (2.6781) time: 0.5292 data: 0.0002 max mem: 29510
Epoch: [6] [ 550/1251] eta: 0:06:14 lr: 0.000010 loss: 2.7607 (2.6777) time: 0.5273 data: 0.0002 max mem: 29510
Epoch: [6] [ 560/1251] eta: 0:06:08 lr: 0.000010 loss: 2.6553 (2.6784) time: 0.5274 data: 0.0002 max mem: 29510
Epoch: [6] [ 570/1251] eta: 0:06:03 lr: 0.000010 loss: 2.6162 (2.6788) time: 0.5262 data: 0.0002 max mem: 29510
Epoch: [6] [ 580/1251] eta: 0:05:57 lr: 0.000010 loss: 2.5730 (2.6755) time: 0.5229 data: 0.0002 max mem: 29510
Epoch: [6] [ 590/1251] eta: 0:05:52 lr: 0.000010 loss: 2.5690 (2.6750) time: 0.5232 data: 0.0002 max mem: 29510
Epoch: [6] [ 600/1251] eta: 0:05:46 lr: 0.000010 loss: 2.6716 (2.6755) time: 0.5228 data: 0.0002 max mem: 29510
Epoch: [6] [ 610/1251] eta: 0:05:41 lr: 0.000010 loss: 2.6716 (2.6737) time: 0.5234 data: 0.0002 max mem: 29510
Epoch: [6] [ 620/1251] eta: 0:05:36 lr: 0.000010 loss: 2.8766 (2.6774) time: 0.5266 data: 0.0002 max mem: 29510
Epoch: [6] [ 630/1251] eta: 0:05:30 lr: 0.000010 loss: 2.8759 (2.6796) time: 0.5290 data: 0.0002 max mem: 29510
Epoch: [6] [ 640/1251] eta: 0:05:25 lr: 0.000010 loss: 2.7951 (2.6791) time: 0.5279 data: 0.0002 max mem: 29510
Epoch: [6] [ 650/1251] eta: 0:05:19 lr: 0.000010 loss: 2.8287 (2.6814) time: 0.5221 data: 0.0002 max mem: 29510
Epoch: [6] [ 660/1251] eta: 0:05:14 lr: 0.000010 loss: 2.8060 (2.6820) time: 0.5228 data: 0.0002 max mem: 29510
Epoch: [6] [ 670/1251] eta: 0:05:09 lr: 0.000010 loss: 2.8060 (2.6826) time: 0.5225 data: 0.0002 max mem: 29510
Epoch: [6] [ 680/1251] eta: 0:05:03 lr: 0.000010 loss: 2.8665 (2.6836) time: 0.5207 data: 0.0002 max mem: 29510
Epoch: [6] [ 690/1251] eta: 0:04:58 lr: 0.000010 loss: 2.8313 (2.6844) time: 0.5238 data: 0.0002 max mem: 29510
Epoch: [6] [ 700/1251] eta: 0:04:53 lr: 0.000010 loss: 2.8935 (2.6852) time: 0.5289 data: 0.0003 max mem: 29510
Epoch: [6] [ 710/1251] eta: 0:04:47 lr: 0.000010 loss: 2.7702 (2.6825) time: 0.5303 data: 0.0002 max mem: 29510
Epoch: [6] [ 720/1251] eta: 0:04:42 lr: 0.000010 loss: 2.7359 (2.6834) time: 0.5271 data: 0.0002 max mem: 29510
Epoch: [6] [ 730/1251] eta: 0:04:37 lr: 0.000010 loss: 2.8700 (2.6849) time: 0.5254 data: 0.0002 max mem: 29510
Epoch: [6] [ 740/1251] eta: 0:04:31 lr: 0.000010 loss: 2.8382 (2.6836) time: 0.5223 data: 0.0002 max mem: 29510
Epoch: [6] [ 750/1251] eta: 0:04:26 lr: 0.000010 loss: 2.7409 (2.6848) time: 0.5249 data: 0.0002 max mem: 29510
Epoch: [6] [ 760/1251] eta: 0:04:20 lr: 0.000010 loss: 2.8011 (2.6864) time: 0.5288 data: 0.0003 max mem: 29510
Epoch: [6] [ 770/1251] eta: 0:04:15 lr: 0.000010 loss: 2.8249 (2.6870) time: 0.5250 data: 0.0002 max mem: 29510
Epoch: [6] [ 780/1251] eta: 0:04:10 lr: 0.000010 loss: 2.4607 (2.6831) time: 0.5212 data: 0.0002 max mem: 29510
Epoch: [6] [ 790/1251] eta: 0:04:04 lr: 0.000010 loss: 2.3261 (2.6807) time: 0.5196 data: 0.0002 max mem: 29510
Epoch: [6] [ 800/1251] eta: 0:03:59 lr: 0.000010 loss: 2.3261 (2.6781) time: 0.5184 data: 0.0002 max mem: 29510
Epoch: [6] [ 810/1251] eta: 0:03:54 lr: 0.000010 loss: 2.7712 (2.6794) time: 0.5169 data: 0.0002 max mem: 29510
Epoch: [6] [ 820/1251] eta: 0:03:48 lr: 0.000010 loss: 2.8440 (2.6794) time: 0.5178 data: 0.0002 max mem: 29510
Epoch: [6] [ 830/1251] eta: 0:03:43 lr: 0.000010 loss: 2.7487 (2.6805) time: 0.5261 data: 0.0002 max mem: 29510
Epoch: [6] [ 840/1251] eta: 0:03:37 lr: 0.000010 loss: 2.9052 (2.6832) time: 0.5238 data: 0.0002 max mem: 29510
Epoch: [6] [ 850/1251] eta: 0:03:32 lr: 0.000010 loss: 2.5890 (2.6813) time: 0.5213 data: 0.0002 max mem: 29510
Epoch: [6] [ 860/1251] eta: 0:03:27 lr: 0.000010 loss: 2.5890 (2.6819) time: 0.5259 data: 0.0002 max mem: 29510
Epoch: [6] [ 870/1251] eta: 0:03:22 lr: 0.000010 loss: 2.7634 (2.6817) time: 0.5243 data: 0.0002 max mem: 29510
Epoch: [6] [ 880/1251] eta: 0:03:16 lr: 0.000010 loss: 2.6985 (2.6814) time: 0.5226 data: 0.0002 max mem: 29510
Epoch: [6] [ 890/1251] eta: 0:03:11 lr: 0.000010 loss: 2.7973 (2.6821) time: 0.5252 data: 0.0002 max mem: 29510
Epoch: [6] [ 900/1251] eta: 0:03:06 lr: 0.000010 loss: 2.7973 (2.6819) time: 0.5281 data: 0.0002 max mem: 29510
Epoch: [6] [ 910/1251] eta: 0:03:00 lr: 0.000010 loss: 2.6758 (2.6811) time: 0.5274 data: 0.0002 max mem: 29510
Epoch: [6] [ 920/1251] eta: 0:02:55 lr: 0.000010 loss: 2.7086 (2.6827) time: 0.5255 data: 0.0002 max mem: 29510
Epoch: [6] [ 930/1251] eta: 0:02:50 lr: 0.000010 loss: 2.7291 (2.6822) time: 0.5243 data: 0.0002 max mem: 29510
Epoch: [6] [ 940/1251] eta: 0:02:44 lr: 0.000010 loss: 2.5534 (2.6807) time: 0.5262 data: 0.0002 max mem: 29510
Epoch: [6] [ 950/1251] eta: 0:02:39 lr: 0.000010 loss: 2.6014 (2.6810) time: 0.5269 data: 0.0002 max mem: 29510
Epoch: [6] [ 960/1251] eta: 0:02:34 lr: 0.000010 loss: 2.8172 (2.6821) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [6] [ 970/1251] eta: 0:02:28 lr: 0.000010 loss: 2.7594 (2.6812) time: 0.5218 data: 0.0002 max mem: 29510
Epoch: [6] [ 980/1251] eta: 0:02:23 lr: 0.000010 loss: 2.8702 (2.6823) time: 0.5235 data: 0.0002 max mem: 29510
Epoch: [6] [ 990/1251] eta: 0:02:18 lr: 0.000010 loss: 2.7961 (2.6818) time: 0.5245 data: 0.0002 max mem: 29510
Epoch: [6] [1000/1251] eta: 0:02:12 lr: 0.000010 loss: 2.7961 (2.6827) time: 0.5230 data: 0.0002 max mem: 29510
Epoch: [6] [1010/1251] eta: 0:02:07 lr: 0.000010 loss: 2.8788 (2.6823) time: 0.5218 data: 0.0002 max mem: 29510
Epoch: [6] [1020/1251] eta: 0:02:02 lr: 0.000010 loss: 2.7588 (2.6826) time: 0.5231 data: 0.0003 max mem: 29510
Epoch: [6] [1030/1251] eta: 0:01:56 lr: 0.000010 loss: 2.7611 (2.6833) time: 0.5196 data: 0.0003 max mem: 29510
Epoch: [6] [1040/1251] eta: 0:01:51 lr: 0.000010 loss: 2.7611 (2.6827) time: 0.5146 data: 0.0002 max mem: 29510
Epoch: [6] [1050/1251] eta: 0:01:46 lr: 0.000010 loss: 2.6169 (2.6824) time: 0.5181 data: 0.0002 max mem: 29510
Epoch: [6] [1060/1251] eta: 0:01:41 lr: 0.000010 loss: 2.5904 (2.6820) time: 0.5251 data: 0.0002 max mem: 29510
Epoch: [6] [1070/1251] eta: 0:01:35 lr: 0.000010 loss: 2.7510 (2.6829) time: 0.5270 data: 0.0002 max mem: 29510
Epoch: [6] [1080/1251] eta: 0:01:30 lr: 0.000010 loss: 2.8405 (2.6831) time: 0.5226 data: 0.0002 max mem: 29510
Epoch: [6] [1090/1251] eta: 0:01:25 lr: 0.000010 loss: 2.8347 (2.6838) time: 0.5211 data: 0.0002 max mem: 29510
Epoch: [6] [1100/1251] eta: 0:01:19 lr: 0.000010 loss: 2.7599 (2.6845) time: 0.5194 data: 0.0002 max mem: 29510
Epoch: [6] [1110/1251] eta: 0:01:14 lr: 0.000010 loss: 2.7179 (2.6853) time: 0.5216 data: 0.0002 max mem: 29510
Epoch: [6] [1120/1251] eta: 0:01:09 lr: 0.000010 loss: 2.7906 (2.6873) time: 0.5213 data: 0.0002 max mem: 29510
Epoch: [6] [1130/1251] eta: 0:01:03 lr: 0.000010 loss: 2.7696 (2.6861) time: 0.5154 data: 0.0002 max mem: 29510
Epoch: [6] [1140/1251] eta: 0:00:58 lr: 0.000010 loss: 2.6447 (2.6864) time: 0.5187 data: 0.0002 max mem: 29510
Epoch: [6] [1150/1251] eta: 0:00:53 lr: 0.000010 loss: 2.7552 (2.6869) time: 0.5142 data: 0.0002 max mem: 29510
Epoch: [6] [1160/1251] eta: 0:00:48 lr: 0.000010 loss: 2.8324 (2.6868) time: 0.5167 data: 0.0002 max mem: 29510
Epoch: [6] [1170/1251] eta: 0:00:42 lr: 0.000010 loss: 2.6814 (2.6863) time: 0.5245 data: 0.0002 max mem: 29510
Epoch: [6] [1180/1251] eta: 0:00:37 lr: 0.000010 loss: 2.8098 (2.6875) time: 0.5257 data: 0.0002 max mem: 29510
Epoch: [6] [1190/1251] eta: 0:00:32 lr: 0.000010 loss: 2.8469 (2.6884) time: 0.5204 data: 0.0002 max mem: 29510
Epoch: [6] [1200/1251] eta: 0:00:26 lr: 0.000010 loss: 2.7799 (2.6892) time: 0.5150 data: 0.0002 max mem: 29510
Epoch: [6] [1210/1251] eta: 0:00:21 lr: 0.000010 loss: 2.7799 (2.6884) time: 0.5188 data: 0.0003 max mem: 29510
Epoch: [6] [1220/1251] eta: 0:00:16 lr: 0.000010 loss: 2.7908 (2.6891) time: 0.5184 data: 0.0003 max mem: 29510
Epoch: [6] [1230/1251] eta: 0:00:11 lr: 0.000010 loss: 2.7627 (2.6890) time: 0.5261 data: 0.0002 max mem: 29510
Epoch: [6] [1240/1251] eta: 0:00:05 lr: 0.000010 loss: 2.5590 (2.6871) time: 0.5145 data: 0.0012 max mem: 29510
Epoch: [6] [1250/1251] eta: 0:00:00 lr: 0.000010 loss: 2.6838 (2.6878) time: 0.4927 data: 0.0011 max mem: 29510
Epoch: [6] Total time: 0:11:00 (0.5278 s / it)
Averaged stats: lr: 0.000010 loss: 2.6838 (2.6819)
Test: [ 0/25] eta: 0:03:11 loss: 0.6949 (0.6949) acc1: 90.8000 (90.8000) acc5: 98.8000 (98.8000) time: 7.6552 data: 7.3645 max mem: 29510
Test: [10/25] eta: 0:00:14 loss: 0.8495 (0.8373) acc1: 86.8000 (86.8000) acc5: 98.0000 (98.0727) time: 0.9979 data: 0.6698 max mem: 29510
Test: [20/25] eta: 0:00:03 loss: 0.9650 (0.9590) acc1: 82.0000 (83.4095) acc5: 96.8000 (96.8762) time: 0.3226 data: 0.0002 max mem: 29510
Test: [24/25] eta: 0:00:00 loss: 1.0272 (0.9677) acc1: 81.2000 (82.9920) acc5: 96.8000 (96.8800) time: 0.3120 data: 0.0002 max mem: 29510
Test: Total time: 0:00:15 (0.6244 s / it)
* Acc@1 83.390 Acc@5 96.798 loss 0.962
Accuracy of the network on the 50000 test images: 83.4%
Max accuracy: 83.42%
Epoch: [7] [ 0/1251] eta: 1:38:22 lr: 0.000010 loss: 3.0935 (3.0935) time: 4.7179 data: 3.8464 max mem: 29510
Epoch: [7] [ 10/1251] eta: 0:19:04 lr: 0.000010 loss: 2.4570 (2.4156) time: 0.9224 data: 0.3499 max mem: 29510
Epoch: [7] [ 20/1251] eta: 0:14:59 lr: 0.000010 loss: 2.4724 (2.4649) time: 0.5315 data: 0.0002 max mem: 29510
Epoch: [7] [ 30/1251] eta: 0:13:29 lr: 0.000010 loss: 2.5220 (2.4762) time: 0.5197 data: 0.0002 max mem: 29510
Epoch: [7] [ 40/1251] eta: 0:12:39 lr: 0.000010 loss: 2.6260 (2.5425) time: 0.5191 data: 0.0002 max mem: 29510
Epoch: [7] [ 50/1251] eta: 0:12:10 lr: 0.000010 loss: 2.8884 (2.5731) time: 0.5240 data: 0.0002 max mem: 29510
Epoch: [7] [ 60/1251] eta: 0:11:51 lr: 0.000010 loss: 2.6745 (2.5887) time: 0.5344 data: 0.0002 max mem: 29510
Epoch: [7] [ 70/1251] eta: 0:11:31 lr: 0.000010 loss: 2.8350 (2.6288) time: 0.5283 data: 0.0002 max mem: 29510
Epoch: [7] [ 80/1251] eta: 0:11:17 lr: 0.000010 loss: 2.8478 (2.6467) time: 0.5213 data: 0.0002 max mem: 29510
Epoch: [7] [ 90/1251] eta: 0:11:04 lr: 0.000010 loss: 2.6650 (2.6170) time: 0.5247 data: 0.0002 max mem: 29510
Epoch: [7] [ 100/1251] eta: 0:10:52 lr: 0.000010 loss: 2.4668 (2.6234) time: 0.5215 data: 0.0002 max mem: 29510
Epoch: [7] [ 110/1251] eta: 0:10:43 lr: 0.000010 loss: 2.8079 (2.6305) time: 0.5263 data: 0.0002 max mem: 29510
Epoch: [7] [ 120/1251] eta: 0:10:34 lr: 0.000010 loss: 2.7585 (2.6302) time: 0.5278 data: 0.0002 max mem: 29510
Epoch: [7] [ 130/1251] eta: 0:10:25 lr: 0.000010 loss: 2.7368 (2.6392) time: 0.5252 data: 0.0002 max mem: 29510
Epoch: [7] [ 140/1251] eta: 0:10:17 lr: 0.000010 loss: 2.6375 (2.6213) time: 0.5277 data: 0.0002 max mem: 29510
Epoch: [7] [ 150/1251] eta: 0:10:09 lr: 0.000010 loss: 2.6446 (2.6359) time: 0.5266 data: 0.0003 max mem: 29510
Epoch: [7] [ 160/1251] eta: 0:10:02 lr: 0.000010 loss: 2.8620 (2.6342) time: 0.5289 data: 0.0003 max mem: 29510
Epoch: [7] [ 170/1251] eta: 0:09:55 lr: 0.000010 loss: 2.6119 (2.6377) time: 0.5281 data: 0.0002 max mem: 29510
Epoch: [7] [ 180/1251] eta: 0:09:48 lr: 0.000010 loss: 2.6793 (2.6339) time: 0.5208 data: 0.0002 max mem: 29510
Epoch: [7] [ 190/1251] eta: 0:09:40 lr: 0.000010 loss: 2.7445 (2.6389) time: 0.5184 data: 0.0002 max mem: 29510
Epoch: [7] [ 200/1251] eta: 0:09:34 lr: 0.000010 loss: 2.8136 (2.6510) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [7] [ 210/1251] eta: 0:09:27 lr: 0.000010 loss: 2.8099 (2.6489) time: 0.5263 data: 0.0002 max mem: 29510
Epoch: [7] [ 220/1251] eta: 0:09:21 lr: 0.000010 loss: 2.7494 (2.6543) time: 0.5277 data: 0.0002 max mem: 29510
Epoch: [7] [ 230/1251] eta: 0:09:15 lr: 0.000010 loss: 2.7494 (2.6556) time: 0.5288 data: 0.0002 max mem: 29510
Epoch: [7] [ 240/1251] eta: 0:09:09 lr: 0.000010 loss: 2.6422 (2.6584) time: 0.5261 data: 0.0003 max mem: 29510
Epoch: [7] [ 250/1251] eta: 0:09:03 lr: 0.000010 loss: 2.8423 (2.6678) time: 0.5246 data: 0.0002 max mem: 29510
Epoch: [7] [ 260/1251] eta: 0:08:57 lr: 0.000010 loss: 2.8423 (2.6707) time: 0.5282 data: 0.0002 max mem: 29510
Epoch: [7] [ 270/1251] eta: 0:08:51 lr: 0.000010 loss: 2.7885 (2.6692) time: 0.5289 data: 0.0002 max mem: 29510
Epoch: [7] [ 280/1251] eta: 0:08:45 lr: 0.000010 loss: 2.4823 (2.6606) time: 0.5226 data: 0.0002 max mem: 29510
Epoch: [7] [ 290/1251] eta: 0:08:39 lr: 0.000010 loss: 2.5217 (2.6579) time: 0.5236 data: 0.0002 max mem: 29510
Epoch: [7] [ 300/1251] eta: 0:08:33 lr: 0.000010 loss: 2.6857 (2.6545) time: 0.5261 data: 0.0002 max mem: 29510
Epoch: [7] [ 310/1251] eta: 0:08:27 lr: 0.000010 loss: 2.7842 (2.6568) time: 0.5207 data: 0.0002 max mem: 29510
Epoch: [7] [ 320/1251] eta: 0:08:21 lr: 0.000010 loss: 2.8240 (2.6642) time: 0.5235 data: 0.0003 max mem: 29510
Epoch: [7] [ 330/1251] eta: 0:08:15 lr: 0.000010 loss: 2.8240 (2.6678) time: 0.5282 data: 0.0003 max mem: 29510
Epoch: [7] [ 340/1251] eta: 0:08:10 lr: 0.000010 loss: 2.7749 (2.6671) time: 0.5275 data: 0.0002 max mem: 29510
Epoch: [7] [ 350/1251] eta: 0:08:04 lr: 0.000010 loss: 2.6701 (2.6670) time: 0.5291 data: 0.0002 max mem: 29510
Epoch: [7] [ 360/1251] eta: 0:07:58 lr: 0.000010 loss: 2.7471 (2.6657) time: 0.5268 data: 0.0002 max mem: 29510
Epoch: [7] [ 370/1251] eta: 0:07:53 lr: 0.000010 loss: 2.7471 (2.6685) time: 0.5204 data: 0.0002 max mem: 29510
Epoch: [7] [ 380/1251] eta: 0:07:47 lr: 0.000010 loss: 2.7815 (2.6701) time: 0.5253 data: 0.0002 max mem: 29510
Epoch: [7] [ 390/1251] eta: 0:07:41 lr: 0.000010 loss: 2.8209 (2.6747) time: 0.5268 data: 0.0002 max mem: 29510
Epoch: [7] [ 400/1251] eta: 0:07:36 lr: 0.000010 loss: 2.8209 (2.6715) time: 0.5232 data: 0.0003 max mem: 29510
Epoch: [7] [ 410/1251] eta: 0:07:30 lr: 0.000010 loss: 2.7954 (2.6727) time: 0.5264 data: 0.0002 max mem: 29510
Epoch: [7] [ 420/1251] eta: 0:07:25 lr: 0.000010 loss: 2.8139 (2.6752) time: 0.5248 data: 0.0002 max mem: 29510
Epoch: [7] [ 430/1251] eta: 0:07:19 lr: 0.000010 loss: 2.7072 (2.6744) time: 0.5261 data: 0.0002 max mem: 29510
Epoch: [7] [ 440/1251] eta: 0:07:14 lr: 0.000010 loss: 2.5959 (2.6685) time: 0.5274 data: 0.0002 max mem: 29510
Epoch: [7] [ 450/1251] eta: 0:07:08 lr: 0.000010 loss: 2.5821 (2.6671) time: 0.5233 data: 0.0002 max mem: 29510
Epoch: [7] [ 460/1251] eta: 0:07:02 lr: 0.000010 loss: 2.5592 (2.6621) time: 0.5239 data: 0.0002 max mem: 29510
Epoch: [7] [ 470/1251] eta: 0:06:57 lr: 0.000010 loss: 2.6124 (2.6637) time: 0.5276 data: 0.0002 max mem: 29510