{"lr": 1.2811502516175406e-05, "data_time": 0.002213573455810547, "grad_norm": 27.62863235473633, "loss": 8.982386207580566, "time": 1.9190522193908692, "epoch": 1, "memory": 36824, "step": 100}
{"lr": 2.559079798705962e-05, "data_time": 0.0021619796752929688, "grad_norm": 24.264714622497557, "loss": 7.532471704483032, "time": 1.993946361541748, "epoch": 1, "memory": 36824, "step": 200}
{"lr": 3.837009345794386e-05, "data_time": 0.002806949615478516, "grad_norm": 17.915128707885742, "loss": 4.989801406860352, "time": 1.9153135538101196, "epoch": 1, "memory": 36824, "step": 300}
{"lr": 5.281069734004309e-05, "data_time": 0.260884952545166, "grad_norm": 6.640846157073975, "loss": 2.409209442138672, "time": 1.9285697937011719, "epoch": 2, "memory": 36824, "step": 413}
{"lr": 6.55899928109273e-05, "data_time": 0.0067786693572998045, "grad_norm": 1.7788047671318055, "loss": 1.3876521110534668, "time": 1.9758976936340331, "epoch": 2, "memory": 36824, "step": 513}
{"lr": 7.836928828181158e-05, "data_time": 0.002939748764038086, "grad_norm": 0.4235467344522476, "loss": 1.0743067264556885, "time": 1.9510942697525024, "epoch": 2, "memory": 36824, "step": 613}
{"lr": 9.280989216391067e-05, "data_time": 0.3869504928588867, "grad_norm": 0.2063712164759636, "loss": 0.9835618078708649, "time": 1.9262565851211548, "epoch": 3, "memory": 36824, "step": 726}
{"lr": 0.00010558918763479485, "data_time": 0.003335380554199219, "grad_norm": 0.21891210973262787, "loss": 0.8818928241729737, "time": 1.914995551109314, "epoch": 3, "memory": 36824, "step": 826}
{"lr": 0.00011836848310567904, "data_time": 0.00245358943939209, "grad_norm": 0.13298725113272666, "loss": 0.825363963842392, "time": 1.9195087194442748, "epoch": 3, "memory": 36824, "step": 926}
{"lr": 0.0001328090869877782, "data_time": 0.19276697635650636, "grad_norm": 0.13208091855049134, "loss": 0.793513947725296, "time": 1.9102522373199462, "epoch": 4, "memory": 36824, "step": 1039}
{"lr": 0.00014558838245866228, "data_time": 0.002361011505126953, "grad_norm": 0.1351453945040703, "loss": 0.7522620618343353, "time": 1.9374320030212402, "epoch": 4, "memory": 36824, "step": 1139}
{"lr": 0.00015836767792954656, "data_time": 0.0031463146209716798, "grad_norm": 0.14456119909882545, "loss": 0.7200703144073486, "time": 1.9459431409835815, "epoch": 4, "memory": 36824, "step": 1239}
{"lr": 0.00017280828181164573, "data_time": 0.22194192409515381, "grad_norm": 0.1751767322421074, "loss": 0.6833173036575317, "time": 1.932199192047119, "epoch": 5, "memory": 36824, "step": 1352}
{"lr": 0.00018558757728252983, "data_time": 0.0020207881927490233, "grad_norm": 0.21709752678871155, "loss": 0.6621390521526337, "time": 1.897406816482544, "epoch": 5, "memory": 36824, "step": 1452}
{"lr": 0.00019836687275341417, "data_time": 0.003476715087890625, "grad_norm": 0.16465356647968293, "loss": 0.6468756258487701, "time": 1.8945335865020752, "epoch": 5, "memory": 36824, "step": 1552}
{"lr": 0.00021280747663551366, "data_time": 0.0018905401229858398, "grad_norm": 0.2253955587744713, "loss": 0.634823614358902, "time": 1.8966960668563844, "epoch": 6, "memory": 36824, "step": 1665}
{"lr": 0.0002255867721063979, "data_time": 0.002083277702331543, "grad_norm": 0.23913377225399018, "loss": 0.6273575901985169, "time": 1.9517154216766357, "epoch": 6, "memory": 36824, "step": 1765}
{"lr": 0.00023836606757728201, "data_time": 0.003179192543029785, "grad_norm": 0.2248229503631592, "loss": 0.6190088510513305, "time": 1.899333667755127, "epoch": 6, "memory": 36824, "step": 1865}
{"lr": 0.00025280667145938127, "data_time": 0.3045757055282593, "grad_norm": 0.2602144360542297, "loss": 0.614613288640976, "time": 1.9075058460235597, "epoch": 7, "memory": 36824, "step": 1978}
{"lr": 0.00026558596693026604, "data_time": 0.0021629095077514648, "grad_norm": 0.29127443879842757, "loss": 0.6066873490810394, "time": 1.9139214277267456, "epoch": 7, "memory": 36824, "step": 2078}
{"lr": 0.0002783652624011506, "data_time": 0.0029099225997924806, "grad_norm": 0.28026892691850663, "loss": 0.5975041449069977, "time": 1.9670852661132812, "epoch": 7, "memory": 36824, "step": 2178}
{"lr": 0.0002928058662832502, "data_time": 0.03581528663635254, "grad_norm": 0.2952950268983841, "loss": 0.5911590456962585, "time": 1.9447311162948608, "epoch": 8, "memory": 36824, "step": 2291}
{"lr": 0.0003055851617541349, "data_time": 0.0021836280822753905, "grad_norm": 0.2936515584588051, "loss": 0.5872137486934662, "time": 1.9682714462280273, "epoch": 8, "memory": 36824, "step": 2391}
{"lr": 0.0003183644572250199, "data_time": 0.0032678842544555664, "grad_norm": 0.3123120069503784, "loss": 0.5871114194393158, "time": 1.911038875579834, "epoch": 8, "memory": 36824, "step": 2491}
{"lr": 0.00033280506110712005, "data_time": 0.20908262729644775, "grad_norm": 0.32384340167045594, "loss": 0.5828545808792114, "time": 1.9000826597213745, "epoch": 9, "memory": 36824, "step": 2604}
{"lr": 0.000345584356578005, "data_time": 0.0027029991149902345, "grad_norm": 0.29279257655143737, "loss": 0.5802187204360962, "time": 1.9504616975784301, "epoch": 9, "memory": 36824, "step": 2704}
{"lr": 0.00035836365204888965, "data_time": 0.0025415897369384767, "grad_norm": 0.2918189734220505, "loss": 0.5758034408092498, "time": 1.8799091577529907, "epoch": 9, "memory": 36824, "step": 2804}
{"lr": 0.0003728042559309897, "data_time": 0.2960104465484619, "grad_norm": 0.30403105169534683, "loss": 0.5764218986034393, "time": 1.954623246192932, "epoch": 10, "memory": 36824, "step": 2917}
{"lr": 0.0003855835514018751, "data_time": 0.0020570993423461915, "grad_norm": 0.2963351055979729, "loss": 0.5713632225990295, "time": 1.9120681285858154, "epoch": 10, "memory": 36824, "step": 3017}
{"lr": 0.00039836284687275994, "data_time": 0.0025146007537841797, "grad_norm": 0.3011041671037674, "loss": 0.5724352240562439, "time": 1.9331496477127075, "epoch": 10, "memory": 36824, "step": 3117}
{"lr": 0.0004128034507548599, "data_time": 0.3122113227844238, "grad_norm": 0.3292048528790474, "loss": 0.5685434579849243, "time": 1.9497551679611207, "epoch": 11, "memory": 36824, "step": 3230}
{"lr": 0.0004255827462257451, "data_time": 0.0036681652069091796, "grad_norm": 0.3705091118812561, "loss": 0.5670882821083069, "time": 1.9542679071426392, "epoch": 11, "memory": 36824, "step": 3330}
{"lr": 0.00043836204169662963, "data_time": 0.004659271240234375, "grad_norm": 0.28464741706848146, "loss": 0.5635366618633271, "time": 1.8597558975219726, "epoch": 11, "memory": 36824, "step": 3430}
{"lr": 0.0004528026455787304, "data_time": 0.22196424007415771, "grad_norm": 0.3523053705692291, "loss": 0.565786612033844, "time": 1.963792634010315, "epoch": 12, "memory": 36824, "step": 3543}
{"lr": 0.00046558194104961477, "data_time": 0.0021514177322387697, "grad_norm": 0.32221158742904665, "loss": 0.5650140702724457, "time": 1.9388016223907472, "epoch": 12, "memory": 36824, "step": 3643}
{"lr": 0.0004783612365204992, "data_time": 0.0025253772735595705, "grad_norm": 0.3266288906335831, "loss": 0.5588069438934327, "time": 1.9355613708496093, "epoch": 12, "memory": 36824, "step": 3743}
{"lr": 0.0004928018404025987, "data_time": 0.42615671157836915, "grad_norm": 0.3166520088911057, "loss": 0.5546895802021027, "time": 1.9308817863464356, "epoch": 13, "memory": 36824, "step": 3856}
{"lr": 0.0005055811358734836, "data_time": 0.0023720264434814453, "grad_norm": 0.2929153829813004, "loss": 0.559228515625, "time": 1.8752539157867432, "epoch": 13, "memory": 36824, "step": 3956}
{"lr": 0.0005183604313443677, "data_time": 0.002434253692626953, "grad_norm": 0.3148412346839905, "loss": 0.5570441842079162, "time": 1.9231654167175294, "epoch": 13, "memory": 36824, "step": 4056}
{"lr": 0.0005328010352264666, "data_time": 0.18967766761779786, "grad_norm": 0.3218613237142563, "loss": 0.5480743706226349, "time": 2.0033138275146483, "epoch": 14, "memory": 36824, "step": 4169}
{"lr": 0.0005455803306973514, "data_time": 0.0022028207778930662, "grad_norm": 0.288123744726181, "loss": 0.5491047739982605, "time": 1.8702369451522827, "epoch": 14, "memory": 36824, "step": 4269}
{"lr": 0.0005583596261682361, "data_time": 0.004266762733459472, "grad_norm": 0.31962688118219373, "loss": 0.5489724397659301, "time": 1.9505786895751953, "epoch": 14, "memory": 36824, "step": 4369}
{"lr": 0.0005728002300503356, "data_time": 0.02403843402862549, "grad_norm": 0.31319957971572876, "loss": 0.5479123890399933, "time": 1.9311464548110961, "epoch": 15, "memory": 36824, "step": 4482}
{"lr": 0.0005855795255212198, "data_time": 0.0035125017166137695, "grad_norm": 0.3084017962217331, "loss": 0.5486978709697723, "time": 1.966028094291687, "epoch": 15, "memory": 36824, "step": 4582}
{"lr": 0.0005983588209921041, "data_time": 0.004432463645935058, "grad_norm": 0.30353135615587234, "loss": 0.5468325078487396, "time": 1.911041498184204, "epoch": 15, "memory": 36824, "step": 4682}
{"lr": 0.0006127994248742038, "data_time": 0.29197115898132325, "grad_norm": 0.3181016072630882, "loss": 0.5472312331199646, "time": 1.8648507356643678, "epoch": 16, "memory": 36824, "step": 4795}
{"lr": 0.0006255787203450881, "data_time": 0.0034905433654785155, "grad_norm": 0.24755016416311265, "loss": 0.5457603335380554, "time": 1.921761155128479, "epoch": 16, "memory": 36824, "step": 4895}
{"lr": 0.0006383580158159728, "data_time": 0.005305266380310059, "grad_norm": 0.2931198313832283, "loss": 0.5442632615566254, "time": 1.9108505964279174, "epoch": 16, "memory": 36824, "step": 4995}
{"lr": 0.0006527986196980722, "data_time": 0.42365944385528564, "grad_norm": 0.2734858438372612, "loss": 0.5440916538238525, "time": 1.8915522813796997, "epoch": 17, "memory": 36824, "step": 5108}
{"lr": 0.0006655779151689563, "data_time": 0.0025803327560424806, "grad_norm": 0.26806820333004, "loss": 0.5374817490577698, "time": 1.9532808780670166, "epoch": 17, "memory": 36824, "step": 5208}
{"lr": 0.0006783572106398399, "data_time": 0.003711366653442383, "grad_norm": 0.2847865238785744, "loss": 0.5439658641815186, "time": 1.873685646057129, "epoch": 17, "memory": 36824, "step": 5308}
{"lr": 0.0006927978145219388, "data_time": 0.35203430652618406, "grad_norm": 0.27529598474502565, "loss": 0.5402236998081207, "time": 1.9208575248718263, "epoch": 18, "memory": 36824, "step": 5421}
{"lr": 0.0007055771099928226, "data_time": 0.0022574901580810548, "grad_norm": 0.2905982494354248, "loss": 0.5408775091171265, "time": 1.89572594165802, "epoch": 18, "memory": 36824, "step": 5521}
{"lr": 0.0007183564054637058, "data_time": 0.002476215362548828, "grad_norm": 0.2975048184394836, "loss": 0.5399012506008148, "time": 1.9532732009887694, "epoch": 18, "memory": 36824, "step": 5621}
{"lr": 0.0007327970093458049, "data_time": 0.23833212852478028, "grad_norm": 0.2904518753290176, "loss": 0.5397711336612702, "time": 1.8957977056503297, "epoch": 19, "memory": 36824, "step": 5734}
{"lr": 0.0007455763048166896, "data_time": 0.0020581483840942383, "grad_norm": 0.2554729953408241, "loss": 0.5373795688152313, "time": 1.8794199466705321, "epoch": 19, "memory": 36824, "step": 5834}
{"lr": 0.0007583556002875734, "data_time": 0.003167414665222168, "grad_norm": 0.2922219097614288, "loss": 0.5325884401798249, "time": 1.8537330389022828, "epoch": 19, "memory": 36824, "step": 5934}
{"lr": 0.0007727962041696723, "data_time": 0.07636749744415283, "grad_norm": 0.2779721736907959, "loss": 0.5364069700241089, "time": 1.8781057596206665, "epoch": 20, "memory": 36824, "step": 6047}
{"lr": 0.0007855754996405561, "data_time": 0.002200746536254883, "grad_norm": 0.2906197875738144, "loss": 0.5374624490737915, "time": 1.8839591026306153, "epoch": 20, "memory": 36824, "step": 6147}
{"lr": 0.0007983547951114399, "data_time": 0.004567766189575195, "grad_norm": 0.2546485006809235, "loss": 0.5329257667064666, "time": 1.9440979480743408, "epoch": 20, "memory": 36824, "step": 6247}
{"lr": 0.00081279539899354, "data_time": 0.2257539987564087, "grad_norm": 0.2811274528503418, "loss": 0.535518330335617, "time": 1.8920392036437987, "epoch": 21, "memory": 36824, "step": 6360}
{"lr": 0.0008255746944644239, "data_time": 0.0031268835067749024, "grad_norm": 0.2787311479449272, "loss": 0.5328471422195434, "time": 1.9003742933273315, "epoch": 21, "memory": 36824, "step": 6460}
{"lr": 0.0008383539899353071, "data_time": 0.002291297912597656, "grad_norm": 0.26946958899497986, "loss": 0.531156724691391, "time": 1.88563072681427, "epoch": 21, "memory": 36824, "step": 6560}
{"lr": 0.000852794593817407, "data_time": 0.10678203105926513, "grad_norm": 0.25382933765649796, "loss": 0.5328520834445953, "time": 1.9278497219085693, "epoch": 22, "memory": 36824, "step": 6673}
{"lr": 0.0008655738892882905, "data_time": 0.0024092912673950194, "grad_norm": 0.255036835372448, "loss": 0.5299738883972168, "time": 1.8991880655288695, "epoch": 22, "memory": 36824, "step": 6773}
{"lr": 0.000878353184759175, "data_time": 0.002336931228637695, "grad_norm": 0.26047737449407576, "loss": 0.5302487909793854, "time": 1.8861654281616211, "epoch": 22, "memory": 36824, "step": 6873}
{"lr": 0.0008927937886412743, "data_time": 0.4053056001663208, "grad_norm": 0.292393285036087, "loss": 0.5267461359500885, "time": 1.8683598995208741, "epoch": 23, "memory": 36824, "step": 6986}
{"lr": 0.0009055730841121587, "data_time": 0.002031898498535156, "grad_norm": 0.26823126077651976, "loss": 0.5294264853000641, "time": 1.860534954071045, "epoch": 23, "memory": 36824, "step": 7086}
{"lr": 0.000918352379583043, "data_time": 0.0025576114654541015, "grad_norm": 0.3128362983465195, "loss": 0.531776773929596, "time": 1.9021053075790406, "epoch": 23, "memory": 36824, "step": 7186}
{"lr": 0.0009327929834651416, "data_time": 0.415441632270813, "grad_norm": 0.2595683947205544, "loss": 0.5284802913665771, "time": 1.887060546875, "epoch": 24, "memory": 36824, "step": 7299}
{"lr": 0.0009455722789360252, "data_time": 0.002247786521911621, "grad_norm": 0.25272706151008606, "loss": 0.5259660661220551, "time": 1.920330286026001, "epoch": 24, "memory": 36824, "step": 7399}
{"lr": 0.0009583515744069084, "data_time": 0.0024123430252075196, "grad_norm": 0.2719668298959732, "loss": 0.5267413020133972, "time": 1.8708943128585815, "epoch": 24, "memory": 36824, "step": 7499}
{"lr": 0.0009727921782890067, "data_time": 0.27077083587646483, "grad_norm": 0.2268507719039917, "loss": 0.5216831684112548, "time": 1.9228171586990357, "epoch": 25, "memory": 36824, "step": 7612}
{"lr": 0.0009855714737598915, "data_time": 0.002364969253540039, "grad_norm": 0.24243998676538467, "loss": 0.5231400549411773, "time": 1.92458233833313, "epoch": 25, "memory": 36824, "step": 7712}
{"lr": 0.000998350769230776, "data_time": 0.003154563903808594, "grad_norm": 0.26079669743776324, "loss": 0.5271035194396972, "time": 1.9305039882659911, "epoch": 25, "memory": 36824, "step": 7812}
{"lr": 0.0010127913731128753, "data_time": 0.4965403079986572, "grad_norm": 0.2751003921031952, "loss": 0.5257688939571381, "time": 1.884821081161499, "epoch": 26, "memory": 36824, "step": 7925}
{"lr": 0.0010255706685837588, "data_time": 0.10223352909088135, "grad_norm": 0.2702625945210457, "loss": 0.5235468506813049, "time": 1.913889741897583, "epoch": 26, "memory": 36824, "step": 8025}
{"lr": 0.001038349964054643, "data_time": 0.002408885955810547, "grad_norm": 0.2770656034350395, "loss": 0.5252753615379333, "time": 1.8497081041336059, "epoch": 26, "memory": 36824, "step": 8125}
{"lr": 0.001052790567936742, "data_time": 0.17569003105163575, "grad_norm": 0.22650285065174103, "loss": 0.518742847442627, "time": 1.9106640815734863, "epoch": 27, "memory": 36824, "step": 8238}
{"lr": 0.001065569863407626, "data_time": 0.0033079862594604494, "grad_norm": 0.26487717479467393, "loss": 0.5206781387329101, "time": 1.9185462474822998, "epoch": 27, "memory": 36824, "step": 8338}
{"lr": 0.0010783491588785082, "data_time": 0.0025029659271240236, "grad_norm": 0.22306870818138122, "loss": 0.5235767066478729, "time": 1.9107948064804077, "epoch": 27, "memory": 36824, "step": 8438}
{"lr": 0.0010927897627606084, "data_time": 0.2999915599822998, "grad_norm": 0.237992824614048, "loss": 0.5266128242015838, "time": 1.9399321794509887, "epoch": 28, "memory": 36824, "step": 8551}
{"lr": 0.0011055690582314978, "data_time": 0.0024714231491088866, "grad_norm": 0.2668233886361122, "loss": 0.5198746085166931, "time": 1.9177021741867066, "epoch": 28, "memory": 36824, "step": 8651}
{"lr": 0.0011183483537023828, "data_time": 0.003995513916015625, "grad_norm": 0.2786188393831253, "loss": 0.5223134100437165, "time": 1.9221089839935304, "epoch": 28, "memory": 36824, "step": 8751}
{"lr": 0.0011327889575844844, "data_time": 0.20547013282775878, "grad_norm": 0.251007454097271, "loss": 0.5185748398303985, "time": 1.8831945419311524, "epoch": 29, "memory": 36824, "step": 8864}
{"lr": 0.0011455682530553688, "data_time": 0.0021758794784545897, "grad_norm": 0.23302307724952698, "loss": 0.5168099582195282, "time": 1.9410382986068726, "epoch": 29, "memory": 36824, "step": 8964}
{"lr": 0.001158347548526254, "data_time": 0.0035265684127807617, "grad_norm": 0.24688583314418794, "loss": 0.5167815327644348, "time": 1.8953089237213134, "epoch": 29, "memory": 36824, "step": 9064}
{"lr": 0.0011727881524083558, "data_time": 0.015937185287475585, "grad_norm": 0.20718517303466796, "loss": 0.5180119097232818, "time": 1.8544183731079102, "epoch": 30, "memory": 36824, "step": 9177}
{"lr": 0.0011855674478792413, "data_time": 0.0019873619079589845, "grad_norm": 0.22744897603988648, "loss": 0.5242837965488434, "time": 1.942936944961548, "epoch": 30, "memory": 36824, "step": 9277}
{"lr": 0.0011983467433501272, "data_time": 0.003018951416015625, "grad_norm": 0.2712869092822075, "loss": 0.5242211103439331, "time": 1.8825279474258423, "epoch": 30, "memory": 36824, "step": 9377}
{"lr": 0.0012127873472322277, "data_time": 0.24677157402038574, "grad_norm": 0.23618974536657333, "loss": 0.5212547242641449, "time": 1.9014246702194213, "epoch": 31, "memory": 36824, "step": 9490}
{"lr": 0.0012255666427031123, "data_time": 0.0022382259368896483, "grad_norm": 0.199832084774971, "loss": 0.5180264770984649, "time": 1.9021787643432617, "epoch": 31, "memory": 36824, "step": 9590}
{"lr": 0.001238345938173996, "data_time": 0.0030870914459228517, "grad_norm": 0.1996348589658737, "loss": 0.5174115121364593, "time": 1.8719886302948, "epoch": 31, "memory": 36824, "step": 9690}
{"lr": 0.0012527865420560965, "data_time": 0.10227155685424805, "grad_norm": 0.23427814841270447, "loss": 0.5214695572853089, "time": 1.9242083549499511, "epoch": 32, "memory": 36824, "step": 9803}
{"lr": 0.0012655658375269794, "data_time": 0.0021378278732299806, "grad_norm": 0.21520504653453826, "loss": 0.5232331216335296, "time": 1.8984554529190063, "epoch": 32, "memory": 36824, "step": 9903}
{"lr": 0.001278345132997864, "data_time": 0.0025705337524414063, "grad_norm": 0.2326987236738205, "loss": 0.515133798122406, "time": 1.9643757104873658, "epoch": 32, "memory": 36824, "step": 10003}
{"lr": 0.0012927857368799647, "data_time": 0.16193573474884032, "grad_norm": 0.23194969743490218, "loss": 0.5227829277515411, "time": 1.9102398872375488, "epoch": 33, "memory": 36824, "step": 10116}
{"lr": 0.0013055650323508495, "data_time": 0.002177906036376953, "grad_norm": 0.24158522188663484, "loss": 0.5162052392959595, "time": 1.8792941331863404, "epoch": 33, "memory": 36824, "step": 10216}
{"lr": 0.0013183443278217345, "data_time": 0.003053617477416992, "grad_norm": 0.23435904383659362, "loss": 0.5182007491588593, "time": 1.892110013961792, "epoch": 33, "memory": 36824, "step": 10316}
{"lr": 0.001332784931703834, "data_time": 0.328900671005249, "grad_norm": 0.22174448817968367, "loss": 0.5218230426311493, "time": 1.854500913619995, "epoch": 34, "memory": 36824, "step": 10429}
{"lr": 0.0013455642271747205, "data_time": 0.0026891469955444337, "grad_norm": 0.21659281402826308, "loss": 0.5202609837055207, "time": 1.8779369115829467, "epoch": 34, "memory": 36824, "step": 10529}
{"lr": 0.001358343522645607, "data_time": 0.004288363456726074, "grad_norm": 0.201743982732296, "loss": 0.5178546488285065, "time": 1.9076083183288575, "epoch": 34, "memory": 36824, "step": 10629}
{"lr": 0.0013727841265277067, "data_time": 0.47898006439208984, "grad_norm": 0.19962656497955322, "loss": 0.5177307426929474, "time": 1.8976992607116698, "epoch": 35, "memory": 36824, "step": 10742}
{"lr": 0.0013855634219985923, "data_time": 0.020000982284545898, "grad_norm": 0.231020487844944, "loss": 0.5175319254398346, "time": 2.185322332382202, "epoch": 35, "memory": 36824, "step": 10842}
{"lr": 0.0013983427174694778, "data_time": 0.002386927604675293, "grad_norm": 0.20502385795116423, "loss": 0.5152030289173126, "time": 1.9054975032806396, "epoch": 35, "memory": 36824, "step": 10942}
{"lr": 0.001412783321351578, "data_time": 0.04299366474151611, "grad_norm": 0.24043065756559373, "loss": 0.5179327249526977, "time": 1.8974952936172484, "epoch": 36, "memory": 36824, "step": 11055}
{"lr": 0.0014255626168224633, "data_time": 0.002834510803222656, "grad_norm": 0.20410854369401932, "loss": 0.514073121547699, "time": 1.9777389526367188, "epoch": 36, "memory": 36824, "step": 11155}
{"lr": 0.00143834191229335, "data_time": 0.0029390573501586912, "grad_norm": 0.21742521673440934, "loss": 0.5121390432119369, "time": 1.948423194885254, "epoch": 36, "memory": 36824, "step": 11255}
{"lr": 0.0014527825161754508, "data_time": 0.6777551651000977, "grad_norm": 0.2105641171336174, "loss": 0.5131726801395416, "time": 2.1131593704223635, "epoch": 37, "memory": 36824, "step": 11368}
{"lr": 0.0014655618116463374, "data_time": 0.2963194608688354, "grad_norm": 0.23949287384748458, "loss": 0.5135184943675994, "time": 1.9579298496246338, "epoch": 37, "memory": 36824, "step": 11468}
{"lr": 0.0014783411071172235, "data_time": 0.5525972843170166, "grad_norm": 0.2342754751443863, "loss": 0.5115727424621582, "time": 2.193642282485962, "epoch": 37, "memory": 36824, "step": 11568}
{"lr": 0.0014927817109993242, "data_time": 0.524295973777771, "grad_norm": 0.21926813423633576, "loss": 0.5141592383384704, "time": 2.2710148334503173, "epoch": 38, "memory": 36824, "step": 11681}
{"lr": 0.0015055610064702107, "data_time": 0.0023337841033935548, "grad_norm": 0.19883454442024232, "loss": 0.5115957438945771, "time": 1.9470129013061523, "epoch": 38, "memory": 36824, "step": 11781}
{"lr": 0.0015183403019410954, "data_time": 0.002568697929382324, "grad_norm": 0.2007873311638832, "loss": 0.5122048199176789, "time": 1.8781697034835816, "epoch": 38, "memory": 36824, "step": 11881}
{"lr": 0.0015327809058231982, "data_time": 0.48240933418273924, "grad_norm": 0.19147169291973115, "loss": 0.5113106489181518, "time": 1.8913833379745484, "epoch": 39, "memory": 36824, "step": 11994}
{"lr": 0.0015455602012940843, "data_time": 0.009712409973144532, "grad_norm": 0.20422742664813995, "loss": 0.51329784989357, "time": 1.9309594631195068, "epoch": 39, "memory": 36824, "step": 12094}
{"lr": 0.0015583394967649703, "data_time": 0.0023655414581298826, "grad_norm": 0.19918537586927415, "loss": 0.5115286827087402, "time": 1.8916216850280763, "epoch": 39, "memory": 36824, "step": 12194}
{"lr": 0.0015727801006470746, "data_time": 0.3074202060699463, "grad_norm": 0.21145551949739455, "loss": 0.5086663961410522, "time": 1.8956143140792847, "epoch": 40, "memory": 36824, "step": 12307}
{"lr": 0.001585559396117961, "data_time": 0.002114415168762207, "grad_norm": 0.20041704922914505, "loss": 0.5091974437236786, "time": 1.9540279388427735, "epoch": 40, "memory": 36824, "step": 12407}
{"lr": 0.001598338691588847, "data_time": 0.003179335594177246, "grad_norm": 0.22324250787496566, "loss": 0.5129700124263763, "time": 1.9610103130340577, "epoch": 40, "memory": 36824, "step": 12507}
{"lr": 0.0015999993162235102, "data_time": 0.002265644073486328, "grad_norm": 0.19340551048517227, "loss": 0.507517746090889, "time": 1.9334410667419433, "epoch": 41, "memory": 36824, "step": 12620}
{"lr": 0.0015999972371979465, "data_time": 0.002535295486450195, "grad_norm": 0.20063892453908921, "loss": 0.5106914728879929, "time": 1.9152180433273316, "epoch": 41, "memory": 36824, "step": 12720}
{"lr": 0.0015999937628572323, "data_time": 0.0033055543899536133, "grad_norm": 0.1985551103949547, "loss": 0.511527681350708, "time": 1.8679906606674195, "epoch": 41, "memory": 36824, "step": 12820}
{"lr": 0.001599988157667916, "data_time": 0.2257859230041504, "grad_norm": 0.18346059173345566, "loss": 0.5075565040111542, "time": 1.9511946439743042, "epoch": 42, "memory": 36824, "step": 12933}
{"lr": 0.0015999817113291892, "data_time": 0.0024827957153320313, "grad_norm": 0.2032284140586853, "loss": 0.5082461446523666, "time": 1.9779958963394164, "epoch": 42, "memory": 36824, "step": 13033}
{"lr": 0.0015999738697023904, "data_time": 0.002539181709289551, "grad_norm": 0.19072289317846297, "loss": 0.5083194822072983, "time": 1.8828806638717652, "epoch": 42, "memory": 36824, "step": 13133}
{"lr": 0.0015999633295219508, "data_time": 0.2719794034957886, "grad_norm": 0.19185469299554825, "loss": 0.5063395023345947, "time": 1.9209911108016968, "epoch": 43, "memory": 36824, "step": 13246}
{"lr": 0.0015999525159802086, "data_time": 0.0023828983306884766, "grad_norm": 0.21185406148433686, "loss": 0.5056363880634308, "time": 1.9485583543777465, "epoch": 43, "memory": 36824, "step": 13346}
{"lr": 0.001599940307201318, "data_time": 0.0033554792404174804, "grad_norm": 0.2078690156340599, "loss": 0.5100001156330108, "time": 1.8484971046447753, "epoch": 43, "memory": 36824, "step": 13446}
{"lr": 0.0015999248322098594, "data_time": 0.08510267734527588, "grad_norm": 0.1893762618303299, "loss": 0.506598499417305, "time": 1.9180966138839721, "epoch": 44, "memory": 36824, "step": 13559}
{"lr": 0.0015999096516498776, "data_time": 0.002937173843383789, "grad_norm": 0.20354648679494858, "loss": 0.5063922315835953, "time": 1.8728575229644775, "epoch": 44, "memory": 36824, "step": 13659}
{"lr": 0.0015998930759275076, "data_time": 0.002449965476989746, "grad_norm": 0.21123158037662507, "loss": 0.5084692656993866, "time": 1.8884799003601074, "epoch": 44, "memory": 36824, "step": 13759}
{"lr": 0.0015998726663894543, "data_time": 0.132383394241333, "grad_norm": 0.19126748889684678, "loss": 0.5115516155958175, "time": 1.909196400642395, "epoch": 45, "memory": 36824, "step": 13872}
{"lr": 0.001599853119070625, "data_time": 0.0023223876953125, "grad_norm": 0.1865355685353279, "loss": 0.5150308668613434, "time": 1.9421117782592774, "epoch": 45, "memory": 36824, "step": 13972}
{"lr": 0.0015998321766880097, "data_time": 0.002741503715515137, "grad_norm": 0.18784242272377014, "loss": 0.5048240840435028, "time": 1.9614238500595094, "epoch": 45, "memory": 36824, "step": 14072}
{"lr": 0.0015998068329521034, "data_time": 0.16086058616638182, "grad_norm": 0.17159706354141235, "loss": 0.5061012715101242, "time": 1.8639030933380127, "epoch": 46, "memory": 36824, "step": 14185}
{"lr": 0.001599782919208438, "data_time": 0.0021657943725585938, "grad_norm": 0.19832735061645507, "loss": 0.5080407530069351, "time": 1.8547291040420533, "epoch": 46, "memory": 36824, "step": 14285}
{"lr": 0.001599757610523425, "data_time": 0.003189373016357422, "grad_norm": 0.16680329144001008, "loss": 0.5035649478435517, "time": 1.8874170303344726, "epoch": 46, "memory": 36824, "step": 14385}
{"lr": 0.0015997273330227186, "data_time": 0.19975335597991944, "grad_norm": 0.18422872424125672, "loss": 0.5055044233798981, "time": 1.9042391061782837, "epoch": 47, "memory": 36824, "step": 14498}
{"lr": 0.0015996990532628345, "data_time": 0.13840100765228272, "grad_norm": 0.15481470376253129, "loss": 0.5098330736160278, "time": 2.297292184829712, "epoch": 47, "memory": 36824, "step": 14598}
{"lr": 0.0015996693787078797, "data_time": 0.003912687301635742, "grad_norm": 0.1761915370821953, "loss": 0.5070678383111954, "time": 1.947789478302002, "epoch": 47, "memory": 36824, "step": 14698}
{"lr": 0.0015996341679597342, "data_time": 0.2520557403564453, "grad_norm": 0.18497462719678878, "loss": 0.5061874210834503, "time": 1.8908863067626953, "epoch": 48, "memory": 36824, "step": 14811}
{"lr": 0.001599601522666859, "data_time": 0.002285146713256836, "grad_norm": 0.19319151043891908, "loss": 0.5055076897144317, "time": 1.8680158138275147, "epoch": 48, "memory": 36824, "step": 14911}
{"lr": 0.0015995674827490168, "data_time": 0.0024488687515258787, "grad_norm": 0.16804752573370935, "loss": 0.5098436117172241, "time": 1.7858685493469237, "epoch": 48, "memory": 36824, "step": 15011}
{"lr": 0.001599527339355088, "data_time": 0.3020961761474609, "grad_norm": 0.17258752137422562, "loss": 0.4998659133911133, "time": 1.9145158529281616, "epoch": 49, "memory": 36824, "step": 15124}
{"lr": 0.0015994903290870357, "data_time": 0.0025113105773925783, "grad_norm": 0.17574655562639235, "loss": 0.5052484393119812, "time": 1.9405503749847413, "epoch": 49, "memory": 36824, "step": 15224}
{"lr": 0.0015994519243879566, "data_time": 0.0030603647232055665, "grad_norm": 0.18678433150053025, "loss": 0.504778403043747, "time": 1.8906092643737793, "epoch": 49, "memory": 36824, "step": 15324}
{"lr": 0.0015994068490341818, "data_time": 0.23291194438934326, "grad_norm": 0.18030133694410325, "loss": 0.5043003648519516, "time": 1.9321042776107789, "epoch": 50, "memory": 36824, "step": 15437}
{"lr": 0.0015993654744233556, "data_time": 0.001998114585876465, "grad_norm": 0.16232144832611084, "loss": 0.5043964087963104, "time": 1.9041910648345948, "epoch": 50, "memory": 36824, "step": 15537}
{"lr": 0.00159932270559927, "data_time": 0.0031441450119018555, "grad_norm": 0.17232210785150529, "loss": 0.502691388130188, "time": 1.9151172637939453, "epoch": 50, "memory": 36824, "step": 15637}
{"lr": 0.0015992726990558643, "data_time": 0.26085472106933594, "grad_norm": 0.17066794633865356, "loss": 0.5031828671693802, "time": 2.002449560165405, "epoch": 51, "memory": 36824, "step": 15750}
{"lr": 0.0015992269608092438, "data_time": 0.002188968658447266, "grad_norm": 0.16913493275642394, "loss": 0.5067823082208633, "time": 1.968889021873474, "epoch": 51, "memory": 36824, "step": 15850}
{"lr": 0.0015991798285909522, "data_time": 0.0025858640670776366, "grad_norm": 0.1632404312491417, "loss": 0.5021112114191055, "time": 1.8744352817535401, "epoch": 51, "memory": 36824, "step": 15950}
{"lr": 0.0015991248917123892, "data_time": 0.19662051200866698, "grad_norm": 0.17934757769107817, "loss": 0.5047576010227204, "time": 1.948106288909912, "epoch": 52, "memory": 36824, "step": 16063}
{"lr": 0.001599074790611515, "data_time": 0.0021884918212890627, "grad_norm": 0.18010894507169722, "loss": 0.5015205711126327, "time": 1.9373501062393188, "epoch": 52, "memory": 36824, "step": 16163}
{"lr": 0.0015990232958043762, "data_time": 0.002607846260070801, "grad_norm": 0.17494061589241028, "loss": 0.5020985543727875, "time": 1.9076655626296997, "epoch": 52, "memory": 36824, "step": 16263}
{"lr": 0.0015989634295293755, "data_time": 0.6697917938232422, "grad_norm": 0.1675620421767235, "loss": 0.5057895749807357, "time": 2.225760555267334, "epoch": 53, "memory": 36824, "step": 16376}
{"lr": 0.0015989089664303375, "data_time": 0.0024816513061523436, "grad_norm": 0.18829745054244995, "loss": 0.5014190256595612, "time": 1.9305165529251098, "epoch": 53, "memory": 36824, "step": 16476}
{"lr": 0.0015988531099142562, "data_time": 0.002496337890625, "grad_norm": 0.1798308476805687, "loss": 0.5002503126859665, "time": 1.7547128677368165, "epoch": 53, "memory": 36824, "step": 16576}
{"lr": 0.0015987883152657663, "data_time": 0.0023943901062011717, "grad_norm": 0.16165224611759185, "loss": 0.503009232878685, "time": 2.0534798860549928, "epoch": 54, "memory": 36824, "step": 16689}
{"lr": 0.0015987294910991875, "data_time": 0.0022185325622558595, "grad_norm": 0.17213917672634124, "loss": 0.5033684730529785, "time": 1.8788278579711915, "epoch": 54, "memory": 36824, "step": 16789}
{"lr": 0.0015986692738285968, "data_time": 0.003226494789123535, "grad_norm": 0.17403486371040344, "loss": 0.49773370921611787, "time": 1.8580562829971314, "epoch": 54, "memory": 36824, "step": 16889}
{"lr": 0.0015985995519137812, "data_time": 0.33960695266723634, "grad_norm": 0.1698737770318985, "loss": 0.5047611057758331, "time": 1.945894432067871, "epoch": 55, "memory": 36824, "step": 17002}
{"lr": 0.0015985363676848045, "data_time": 0.002144312858581543, "grad_norm": 0.15200753509998322, "loss": 0.501143628358841, "time": 1.8497392892837525, "epoch": 55, "memory": 36824, "step": 17102}
{"lr": 0.0015984717906886525, "data_time": 0.00254673957824707, "grad_norm": 0.1640369713306427, "loss": 0.5019455254077911, "time": 1.8675736904144287, "epoch": 55, "memory": 36824, "step": 17202}
{"lr": 0.0015983971426988641, "data_time": 0.15832288265228273, "grad_norm": 0.17208249866962433, "loss": 0.5023378938436508, "time": 1.908101773262024, "epoch": 56, "memory": 36824, "step": 17315}
{"lr": 0.0015983295994871321, "data_time": 0.0024952411651611326, "grad_norm": 0.17628853917121887, "loss": 0.49923659265041354, "time": 1.9006362199783324, "epoch": 56, "memory": 36824, "step": 17415}
{"lr": 0.0015982606638688611, "data_time": 0.0030425071716308595, "grad_norm": 0.19415006637573243, "loss": 0.5056566029787064, "time": 1.9715508937835693, "epoch": 56, "memory": 36824, "step": 17515}
{"lr": 0.0015981810910796303, "data_time": 0.34750473499298096, "grad_norm": 0.16922648698091508, "loss": 0.4986822813749313, "time": 1.885604190826416, "epoch": 57, "memory": 36824, "step": 17628}
{"lr": 0.0015981091900392726, "data_time": 0.11570429801940918, "grad_norm": 0.1796618029475212, "loss": 0.4964832603931427, "time": 1.8900628328323363, "epoch": 57, "memory": 36824, "step": 17728}
{"lr": 0.0015980358969768025, "data_time": 0.002623152732849121, "grad_norm": 0.16158892959356308, "loss": 0.5045110821723938, "time": 1.8919615507125855, "epoch": 57, "memory": 36824, "step": 17828}
{"lr": 0.001597951400747807, "data_time": 0.09412906169891358, "grad_norm": 0.1645933695137501, "loss": 0.500471505522728, "time": 1.8901229619979858, "epoch": 58, "memory": 36824, "step": 17941}
{"lr": 0.0015978751431074145, "data_time": 0.03062572479248047, "grad_norm": 0.18192660957574844, "loss": 0.49879494309425354, "time": 2.063242769241333, "epoch": 58, "memory": 36824, "step": 18041}
{"lr": 0.0015977974938531216, "data_time": 0.0030585765838623048, "grad_norm": 0.1774825170636177, "loss": 0.5027704119682312, "time": 2.1062671422958372, "epoch": 58, "memory": 36824, "step": 18141}
{"lr": 0.00159770807562817, "data_time": 0.0021922111511230467, "grad_norm": 0.16701520308852197, "loss": 0.5023482084274292, "time": 2.005924844741821, "epoch": 59, "memory": 36824, "step": 18254}
{"lr": 0.0015976274626907732, "data_time": 0.002217221260070801, "grad_norm": 0.15946613103151322, "loss": 0.5005955129861832, "time": 1.8542798042297364, "epoch": 59, "memory": 36824, "step": 18354}
{"lr": 0.001597545458571469, "data_time": 0.003045320510864258, "grad_norm": 0.17826333343982698, "loss": 0.5001899629831315, "time": 1.8911072492599488, "epoch": 59, "memory": 36824, "step": 18454}
{"lr": 0.0015974511198784702, "data_time": 0.0021586179733276366, "grad_norm": 0.1611877992749214, "loss": 0.4962626785039902, "time": 1.8799171686172484, "epoch": 60, "memory": 36824, "step": 18567}
{"lr": 0.0015973661530215223, "data_time": 0.0022942304611206056, "grad_norm": 0.17285136580467225, "loss": 0.49992031753063204, "time": 1.9277156352996827, "epoch": 60, "memory": 36824, "step": 18667}
{"lr": 0.0015972797954384302, "data_time": 0.0028430700302124025, "grad_norm": 0.17362937778234483, "loss": 0.5029088467359543, "time": 1.8205391883850097, "epoch": 60, "memory": 36824, "step": 18767}
{"lr": 0.001597180537889375, "data_time": 0.37113425731658933, "grad_norm": 0.16265316903591157, "loss": 0.49829441010951997, "time": 1.8475321531295776, "epoch": 61, "memory": 36824, "step": 18880}
{"lr": 0.0015970912185647248, "data_time": 0.002232217788696289, "grad_norm": 0.15447299405932427, "loss": 0.5052911877632141, "time": 1.9142731189727784, "epoch": 61, "memory": 36824, "step": 18980}
{"lr": 0.0015970005089934567, "data_time": 0.0026275157928466798, "grad_norm": 0.19389368444681168, "loss": 0.5013639032840729, "time": 1.9778602838516235, "epoch": 61, "memory": 36824, "step": 19080}
{"lr": 0.0015968963342843808, "data_time": 0.23035056591033937, "grad_norm": 0.16270118802785874, "loss": 0.5007614374160767, "time": 1.8795423984527588, "epoch": 62, "memory": 36824, "step": 19193}
{"lr": 0.0015968026640182501, "data_time": 0.002313971519470215, "grad_norm": 0.15815811306238176, "loss": 0.50072141289711, "time": 1.8758023500442504, "epoch": 62, "memory": 36824, "step": 19293}
{"lr": 0.0015967076040087837, "data_time": 0.0031747817993164062, "grad_norm": 0.17869056314229964, "loss": 0.5015316665172577, "time": 1.9340933084487915, "epoch": 62, "memory": 36824, "step": 19393}
{"lr": 0.0015965985139197452, "data_time": 0.12887203693389893, "grad_norm": 0.15940226018428802, "loss": 0.5012534707784653, "time": 1.8505306959152221, "epoch": 63, "memory": 36824, "step": 19506}
{"lr": 0.0015965004943126995, "data_time": 0.0021125316619873048, "grad_norm": 0.14508493095636368, "loss": 0.4980751007795334, "time": 1.907819437980652, "epoch": 63, "memory": 36824, "step": 19606}
{"lr": 0.0015964010854893488, "data_time": 0.0025706768035888674, "grad_norm": 0.1637595236301422, "loss": 0.49914342164993286, "time": 1.8912510871887207, "epoch": 63, "memory": 36824, "step": 19706}
{"lr": 0.0015962870818843975, "data_time": 0.23969833850860595, "grad_norm": 0.16597641855478287, "loss": 0.4993285953998566, "time": 1.9147238731384277, "epoch": 64, "memory": 36824, "step": 19819}
{"lr": 0.0015961847146113229, "data_time": 0.0020839929580688476, "grad_norm": 0.15628280118107796, "loss": 0.4966996133327484, "time": 2.1893155336380006, "epoch": 64, "memory": 36824, "step": 19919}
{"lr": 0.0015960809586727102, "data_time": 0.0026971101760864258, "grad_norm": 0.17010149955749512, "loss": 0.49865732789039613, "time": 1.8519896268844604, "epoch": 64, "memory": 36824, "step": 20019}
{"lr": 0.0015959620434998526, "data_time": 0.08209366798400879, "grad_norm": 0.16033223643898964, "loss": 0.49749027788639066, "time": 1.934156608581543, "epoch": 65, "memory": 36824, "step": 20132}
{"lr": 0.0015958553303099215, "data_time": 0.002220892906188965, "grad_norm": 0.15869409665465356, "loss": 0.4976803123950958, "time": 1.9541440725326538, "epoch": 65, "memory": 36824, "step": 20232}
{"lr": 0.0015957472290289488, "data_time": 0.002617216110229492, "grad_norm": 0.16656795591115953, "loss": 0.4973181188106537, "time": 1.917637872695923, "epoch": 65, "memory": 36824, "step": 20332}
{"lr": 0.0015956234043201178, "data_time": 0.012223625183105468, "grad_norm": 0.1612636223435402, "loss": 0.49634858667850495, "time": 1.8827759742736816, "epoch": 66, "memory": 36824, "step": 20445}
{"lr": 0.0015955123470367653, "data_time": 0.0021933317184448242, "grad_norm": 0.1747029110789299, "loss": 0.4958700090646744, "time": 1.982958436012268, "epoch": 66, "memory": 36824, "step": 20545}
{"lr": 0.0015953999022605837, "data_time": 0.0028417825698852537, "grad_norm": 0.15770643278956414, "loss": 0.4950293779373169, "time": 1.8015393972396851, "epoch": 66, "memory": 36824, "step": 20645}
{"lr": 0.0015952711701316014, "data_time": 0.18152050971984862, "grad_norm": 0.15311774462461472, "loss": 0.49982097148895266, "time": 1.8691584825515748, "epoch": 67, "memory": 36824, "step": 20758}
{"lr": 0.0015951557706524902, "data_time": 0.00236356258392334, "grad_norm": 0.15624743998050689, "loss": 0.5000448077917099, "time": 2.0097821235656737, "epoch": 67, "memory": 36824, "step": 20858}
{"lr": 0.001595038984302472, "data_time": 0.002549004554748535, "grad_norm": 0.15756705701351165, "loss": 0.49964770674705505, "time": 1.9633522033691406, "epoch": 67, "memory": 36824, "step": 20958}
{"lr": 0.0015949053469530151, "data_time": 0.12289144992828369, "grad_norm": 0.14888546615839005, "loss": 0.5002924740314484, "time": 1.875451636314392, "epoch": 68, "memory": 36824, "step": 21071}
{"lr": 0.0015947856072500057, "data_time": 0.0021778345108032227, "grad_norm": 0.168968166410923, "loss": 0.5003884732723236, "time": 1.9127749919891357, "epoch": 68, "memory": 36824, "step": 21171}
{"lr": 0.001594664481321708, "data_time": 0.002483773231506348, "grad_norm": 0.16224458813667297, "loss": 0.5006517857313156, "time": 1.888793706893921, "epoch": 68, "memory": 36824, "step": 21271}
{"lr": 0.0015945259410352717, "data_time": 0.0025855064392089843, "grad_norm": 0.15316785573959352, "loss": 0.4950763016939163, "time": 1.8336049795150757, "epoch": 69, "memory": 36824, "step": 21384}
{"lr": 0.0015944018631543818, "data_time": 0.002349996566772461, "grad_norm": 0.18214132636785507, "loss": 0.49927590489387513, "time": 1.9360841751098632, "epoch": 69, "memory": 36824, "step": 21484}
{"lr": 0.0015942763997175125, "data_time": 0.0025316476821899414, "grad_norm": 0.1576600342988968, "loss": 0.49676951467990876, "time": 1.8711662530899047, "epoch": 69, "memory": 36824, "step": 21584}
{"lr": 0.0015941329588613658, "data_time": 0.0023781538009643556, "grad_norm": 0.15293204486370088, "loss": 0.501326397061348, "time": 2.288693380355835, "epoch": 70, "memory": 36824, "step": 21697}
{"lr": 0.0015940045449227465, "data_time": 0.002094459533691406, "grad_norm": 0.16193269714713096, "loss": 0.4961808264255524, "time": 1.8567462682723999, "epoch": 70, "memory": 36824, "step": 21797}
{"lr": 0.0015938747461211296, "data_time": 0.0032755136489868164, "grad_norm": 0.15301893651485443, "loss": 0.4962302088737488, "time": 2.054647350311279, "epoch": 70, "memory": 36824, "step": 21897}
{"lr": 0.0015937264071462821, "data_time": 0.19327025413513182, "grad_norm": 0.14074397832155228, "loss": 0.49800051748752594, "time": 1.8865682125091552, "epoch": 71, "memory": 36824, "step": 22010}
{"lr": 0.0015935936593441716, "data_time": 0.0023389577865600584, "grad_norm": 0.16556250154972077, "loss": 0.497174933552742, "time": 1.8879141330718994, "epoch": 71, "memory": 36824, "step": 22110}
{"lr": 0.0015934595273957106, "data_time": 0.003184080123901367, "grad_norm": 0.16041972488164902, "loss": 0.4970362186431885, "time": 1.916041088104248, "epoch": 71, "memory": 36824, "step": 22210}
{"lr": 0.0015933062928368648, "data_time": 0.24664881229400634, "grad_norm": 0.15297755599021912, "loss": 0.49944988191127776, "time": 1.92495539188385, "epoch": 72, "memory": 36824, "step": 22323}
{"lr": 0.0015931692134395568, "data_time": 0.0025780916213989256, "grad_norm": 0.14851802736520767, "loss": 0.4944258689880371, "time": 1.89563148021698, "epoch": 72, "memory": 36824, "step": 22423}
{"lr": 0.0015930307506361963, "data_time": 0.0033803224563598634, "grad_norm": 0.14963361620903015, "loss": 0.49438579976558683, "time": 1.8559472799301147, "epoch": 72, "memory": 36824, "step": 22523}
{"lr": 0.0015928726231117082, "data_time": 0.20469112396240235, "grad_norm": 0.16594320982694627, "loss": 0.4947543919086456, "time": 1.9756564378738404, "epoch": 73, "memory": 36824, "step": 22636}
{"lr": 0.0015927312144615108, "data_time": 0.0022099494934082033, "grad_norm": 0.1634588919579983, "loss": 0.4962147682905197, "time": 1.860531497001648, "epoch": 73, "memory": 36824, "step": 22736}
{"lr": 0.001592588423169198, "data_time": 0.003305530548095703, "grad_norm": 0.14331649988889694, "loss": 0.49888773262500763, "time": 1.8520727634429932, "epoch": 73, "memory": 36824, "step": 22836}
{"lr": 0.0015924254053810332, "data_time": 0.48051156997680666, "grad_norm": 0.130735445022583, "loss": 0.5008675545454025, "time": 1.8781434535980224, "epoch": 74, "memory": 36824, "step": 22949}
{"lr": 0.0015922796698942308, "data_time": 0.002329397201538086, "grad_norm": 0.1679140493273735, "loss": 0.4953462153673172, "time": 1.9186381578445435, "epoch": 74, "memory": 36824, "step": 23049}
{"lr": 0.001592132552552874, "data_time": 0.0026006221771240233, "grad_norm": 0.16060732156038285, "loss": 0.5018518835306167, "time": 1.8827243089675902, "epoch": 74, "memory": 36824, "step": 23149}
{"lr": 0.0015919646472865544, "data_time": 0.3799311399459839, "grad_norm": 0.19528377503156663, "loss": 0.49708354771137236, "time": 1.8528534173965454, "epoch": 75, "memory": 36824, "step": 23262}
{"lr": 0.0015918145874533659, "data_time": 0.12347936630249023, "grad_norm": 0.14551581367850303, "loss": 0.4966806322336197, "time": 1.867007851600647, "epoch": 75, "memory": 36824, "step": 23362}
{"lr": 0.0015916631465767957, "data_time": 0.0031633377075195312, "grad_norm": 0.169583235681057, "loss": 0.4976761430501938, "time": 2.1736653089523315, "epoch": 75, "memory": 36824, "step": 23462}
{"lr": 0.0015914903567013606, "data_time": 0.08091063499450683, "grad_norm": 0.15439294129610062, "loss": 0.500319042801857, "time": 1.8631286144256591, "epoch": 76, "memory": 36824, "step": 23575}
{"lr": 0.0015913359750858937, "data_time": 0.0022769451141357424, "grad_norm": 0.16155520156025888, "loss": 0.5008346676826477, "time": 1.8440065383911133, "epoch": 76, "memory": 36824, "step": 23675}
{"lr": 0.0015911802132618157, "data_time": 0.0036096572875976562, "grad_norm": 0.17164436727762222, "loss": 0.4952242821455002, "time": 1.8623979091644287, "epoch": 76, "memory": 36824, "step": 23775}
{"lr": 0.0015910025417297674, "data_time": 0.03405768871307373, "grad_norm": 0.14069576561450958, "loss": 0.4970181524753571, "time": 2.014390301704407, "epoch": 77, "memory": 36824, "step": 23888}
{"lr": 0.0015908438409699772, "data_time": 0.002875089645385742, "grad_norm": 0.1646365039050579, "loss": 0.4970458924770355, "time": 1.8498972177505493, "epoch": 77, "memory": 36824, "step": 23988}
{"lr": 0.001590683760859933, "data_time": 0.002624058723449707, "grad_norm": 0.17602379620075226, "loss": 0.4951163738965988, "time": 1.9326467752456664, "epoch": 77, "memory": 36824, "step": 24088}
{"lr": 0.0015905012107071844, "data_time": 0.2606935739517212, "grad_norm": 0.17163899093866347, "loss": 0.49792048037052156, "time": 1.9067004680633546, "epoch": 78, "memory": 36824, "step": 24201}
{"lr": 0.00159033819351483, "data_time": 0.002329897880554199, "grad_norm": 0.14161070436239243, "loss": 0.4967062383890152, "time": 1.8743001461029052, "epoch": 78, "memory": 36824, "step": 24301}
{"lr": 0.0015901737978541444, "data_time": 0.002601957321166992, "grad_norm": 0.17075709551572799, "loss": 0.49381603598594664, "time": 1.8357742786407472, "epoch": 78, "memory": 36824, "step": 24401}
{"lr": 0.001589986372199978, "data_time": 0.22046308517456054, "grad_norm": 0.16513848900794983, "loss": 0.4961204528808594, "time": 1.835726809501648, "epoch": 79, "memory": 36824, "step": 24514}
{"lr": 0.0015898190413605724, "data_time": 0.0022958040237426756, "grad_norm": 0.16390977352857589, "loss": 0.495221683382988, "time": 2.047697567939758, "epoch": 79, "memory": 36824, "step": 24614}
{"lr": 0.0015896503329583143, "data_time": 0.01376032829284668, "grad_norm": 0.16074201315641404, "loss": 0.4957681715488434, "time": 1.9370209932327271, "epoch": 79, "memory": 36824, "step": 24714}
{"lr": 0.0015894580350053165, "data_time": 0.30613934993743896, "grad_norm": 0.17463859021663666, "loss": 0.5002077132463455, "time": 1.9097514867782592, "epoch": 80, "memory": 36824, "step": 24827}
{"lr": 0.0015892863933780821, "data_time": 0.002865910530090332, "grad_norm": 0.1377936013042927, "loss": 0.4975591659545898, "time": 1.860866641998291, "epoch": 80, "memory": 36824, "step": 24927}
{"lr": 0.0015891133751170165, "data_time": 0.0025384902954101564, "grad_norm": 0.14619275480508803, "loss": 0.4957934617996216, "time": 1.8365981578826904, "epoch": 80, "memory": 36824, "step": 25027}
{"lr": 0.0015889162081510271, "data_time": 0.2767892360687256, "grad_norm": 0.15886927619576455, "loss": 0.49482182264328, "time": 1.8961994886398315, "epoch": 81, "memory": 36824, "step": 25140}
{"lr": 0.001588740258668845, "data_time": 0.002467918395996094, "grad_norm": 0.14764223769307136, "loss": 0.49413350224494934, "time": 1.8673210144042969, "epoch": 81, "memory": 36824, "step": 25240}
{"lr": 0.001588562933505373, "data_time": 0.003285121917724609, "grad_norm": 0.15049359053373337, "loss": 0.49711272716522215, "time": 1.856051754951477, "epoch": 81, "memory": 36824, "step": 25340}
{"lr": 0.0015883609008954334, "data_time": 0.16874544620513915, "grad_norm": 0.15721638798713683, "loss": 0.494277748465538, "time": 1.8597761154174806, "epoch": 82, "memory": 36824, "step": 25453}
{"lr": 0.0015881806465647967, "data_time": 0.002255105972290039, "grad_norm": 0.1397046074271202, "loss": 0.49707016050815583, "time": 1.8979951381683349, "epoch": 82, "memory": 36824, "step": 25553}
{"lr": 0.0015879990175289163, "data_time": 0.003206300735473633, "grad_norm": 0.14017621651291848, "loss": 0.4938764154911041, "time": 1.890860414505005, "epoch": 82, "memory": 36824, "step": 25653}
{"lr": 0.001587792122727204, "data_time": 0.44751811027526855, "grad_norm": 0.14575783759355546, "loss": 0.4991327464580536, "time": 1.8907009363174438, "epoch": 83, "memory": 36824, "step": 25766}
{"lr": 0.0015876075666281617, "data_time": 0.16630816459655762, "grad_norm": 0.1430206686258316, "loss": 0.4962897330522537, "time": 1.9371206045150757, "epoch": 83, "memory": 36824, "step": 25866}
{"lr": 0.0015874216368234125, "data_time": 0.003250789642333984, "grad_norm": 0.15213179141283034, "loss": 0.494027841091156, "time": 1.9147136926651, "epoch": 83, "memory": 36824, "step": 25966}
{"lr": 0.0015872098833651892, "data_time": 0.1332559585571289, "grad_norm": 0.16103745326399804, "loss": 0.4911139816045761, "time": 1.8524922370910644, "epoch": 84, "memory": 36824, "step": 26079}
{"lr": 0.001587021028651295, "data_time": 0.002423596382141113, "grad_norm": 0.15589655563235283, "loss": 0.49023948013782503, "time": 1.8983437061309814, "epoch": 84, "memory": 36824, "step": 26179}
{"lr": 0.0015868308012547054, "data_time": 0.0027997732162475587, "grad_norm": 0.16132925301790238, "loss": 0.4944963902235031, "time": 1.8735070705413819, "epoch": 84, "memory": 36824, "step": 26279}
{"lr": 0.0015866141927582532, "data_time": 0.6125184297561646, "grad_norm": 0.17133795619010925, "loss": 0.4974230617284775, "time": 2.0878718376159666, "epoch": 85, "memory": 36824, "step": 26392}
{"lr": 0.0015864210426565148, "data_time": 0.0023277759552001952, "grad_norm": 0.15821097046136856, "loss": 0.4936854213476181, "time": 1.8785223245620728, "epoch": 85, "memory": 36824, "step": 26492}
{"lr": 0.0015862265209185447, "data_time": 0.0024805307388305665, "grad_norm": 0.16614853143692015, "loss": 0.4905063331127167, "time": 1.865849256515503, "epoch": 85, "memory": 36824, "step": 26592}
{"lr": 0.0015860050610851063, "data_time": 0.0025700092315673827, "grad_norm": 0.17229360416531564, "loss": 0.49309514462947845, "time": 1.9418335914611817, "epoch": 86, "memory": 36824, "step": 26705}
{"lr": 0.0015858076188959252, "data_time": 0.0023065805435180664, "grad_norm": 0.18263541907072067, "loss": 0.49665592014789584, "time": 2.147817540168762, "epoch": 86, "memory": 36824, "step": 26805}
{"lr": 0.0015856088061404132, "data_time": 0.0034238100051879883, "grad_norm": 0.15663941726088523, "loss": 0.4967365026473999, "time": 1.989262294769287, "epoch": 86, "memory": 36824, "step": 26905}
{"lr": 0.0015853824987541283, "data_time": 0.0027007102966308595, "grad_norm": 0.16633783727884294, "loss": 0.495611634850502, "time": 1.9476771354675293, "epoch": 87, "memory": 36824, "step": 27018}
{"lr": 0.0015851807678512447, "data_time": 0.002177858352661133, "grad_norm": 0.15100570693612098, "loss": 0.4929488033056259, "time": 1.957259225845337, "epoch": 87, "memory": 36824, "step": 27118}
{"lr": 0.0015849776674753562, "data_time": 0.0025588512420654298, "grad_norm": 0.14544184729456902, "loss": 0.4941647291183472, "time": 1.8452971935272218, "epoch": 87, "memory": 36824, "step": 27218}
{"lr": 0.0015847465164031901, "data_time": 0.31578199863433837, "grad_norm": 0.137550700455904, "loss": 0.49207558333873747, "time": 1.8864487409591675, "epoch": 88, "memory": 36824, "step": 27331}
{"lr": 0.0015845405002336303, "data_time": 0.0022011756896972655, "grad_norm": 0.16556642949581146, "loss": 0.4975768983364105, "time": 1.9826364278793336, "epoch": 88, "memory": 36824, "step": 27431}
{"lr": 0.0015843331157077876, "data_time": 0.0026609420776367186, "grad_norm": 0.13094720542430877, "loss": 0.4930807828903198, "time": 1.8966126918792725, "epoch": 88, "memory": 36824, "step": 27531}
{"lr": 0.001584097124899476, "data_time": 0.011198687553405761, "grad_norm": 0.18085446953773499, "loss": 0.49382061660289767, "time": 1.8785374402999877, "epoch": 89, "memory": 36824, "step": 27644}
{"lr": 0.001583886826983486, "data_time": 0.0023068666458129885, "grad_norm": 0.15777995586395263, "loss": 0.4915014296770096, "time": 1.8860194206237793, "epoch": 89, "memory": 36824, "step": 27744}
{"lr": 0.001583675161851319, "data_time": 0.0025803327560424806, "grad_norm": 0.1315284937620163, "loss": 0.49770565927028654, "time": 1.8859882593154906, "epoch": 89, "memory": 36824, "step": 27844}
{"lr": 0.0015834343353392955, "data_time": 0.33597545623779296, "grad_norm": 0.16200171709060668, "loss": 0.496955481171608, "time": 1.9710357427597045, "epoch": 90, "memory": 36824, "step": 27957}
{"lr": 0.00158321975927029, "data_time": 0.0029061555862426756, "grad_norm": 0.14926654547452928, "loss": 0.4933037132024765, "time": 1.8667288780212403, "epoch": 90, "memory": 36824, "step": 28057}
{"lr": 0.001583003817148571, "data_time": 0.003081488609313965, "grad_norm": 0.14951784536242485, "loss": 0.49711131155490873, "time": 2.0676468133926393, "epoch": 90, "memory": 36824, "step": 28157}
{"lr": 0.0015827581590478923, "data_time": 0.012783336639404296, "grad_norm": 0.1438809685409069, "loss": 0.49244947731494904, "time": 1.8837517976760865, "epoch": 91, "memory": 36824, "step": 28270}
{"lr": 0.0015825393084923833, "data_time": 0.0022513151168823244, "grad_norm": 0.1420236572623253, "loss": 0.4985931158065796, "time": 1.8657740116119386, "epoch": 91, "memory": 36824, "step": 28370}
{"lr": 0.0015823190930709685, "data_time": 0.0033553600311279296, "grad_norm": 0.15878373757004738, "loss": 0.4934094876050949, "time": 1.8908115386962892, "epoch": 91, "memory": 36824, "step": 28470}
{"lr": 0.001582068607579257, "data_time": 0.2777329206466675, "grad_norm": 0.13963473662734033, "loss": 0.494118919968605, "time": 1.877320909500122, "epoch": 92, "memory": 36824, "step": 28583}
{"lr": 0.0015818454862767933, "data_time": 0.002313971519470215, "grad_norm": 0.1651138573884964, "loss": 0.5015892446041107, "time": 1.9052289485931397, "epoch": 92, "memory": 36824, "step": 28683}
{"lr": 0.0015816210013185558, "data_time": 0.002519369125366211, "grad_norm": 0.15061048939824104, "loss": 0.4967843860387802, "time": 1.8782068967819214, "epoch": 92, "memory": 36824, "step": 28783}
{"lr": 0.0015813656927159156, "data_time": 0.3023559093475342, "grad_norm": 0.14561443999409676, "loss": 0.4965152621269226, "time": 1.8157068490982056, "epoch": 93, "memory": 36824, "step": 28896}
{"lr": 0.0015811383044790287, "data_time": 0.002361750602722168, "grad_norm": 0.13676575124263762, "loss": 0.491452094912529, "time": 1.8533717393875122, "epoch": 93, "memory": 36824, "step": 28996}
{"lr": 0.0015809095538197983, "data_time": 0.002676868438720703, "grad_norm": 0.15944801345467569, "loss": 0.4925440281629562, "time": 1.858943247795105, "epoch": 93, "memory": 36824, "step": 29096}
{"lr": 0.0015806494264687478, "data_time": 0.002872061729431152, "grad_norm": 0.15736321806907655, "loss": 0.4921214312314987, "time": 1.8837630033493042, "epoch": 94, "memory": 36824, "step": 29209}
{"lr": 0.0015804177751828723, "data_time": 0.0027872323989868164, "grad_norm": 0.14905685633420945, "loss": 0.49401370584964754, "time": 1.8449570178985595, "epoch": 94, "memory": 36824, "step": 29309}
{"lr": 0.0015801847627313658, "data_time": 0.0031935930252075194, "grad_norm": 0.1431156113743782, "loss": 0.49315050840377805, "time": 1.8686672687530517, "epoch": 94, "memory": 36824, "step": 29409}
{"lr": 0.0015799198210767666, "data_time": 0.9707358837127685, "grad_norm": 0.1515482932329178, "loss": 0.4930494874715805, "time": 2.8838570356369018, "epoch": 95, "memory": 36824, "step": 29522}
{"lr": 0.0015796839107001862, "data_time": 0.22630069255828858, "grad_norm": 0.17655980288982392, "loss": 0.4955602765083313, "time": 2.8337144374847414, "epoch": 95, "memory": 36824, "step": 29622}
{"lr": 0.001579446640437943, "data_time": 0.0044411420822143555, "grad_norm": 0.17733051851391793, "loss": 0.4917856454849243, "time": 2.8222693681716917, "epoch": 95, "memory": 36824, "step": 29722}
{"lr": 0.0015791768890069148, "data_time": 1.149603271484375, "grad_norm": 0.16076652556657792, "loss": 0.4962903708219528, "time": 2.8175268173217773, "epoch": 96, "memory": 36824, "step": 29835}
{"lr": 0.0015789367235706862, "data_time": 0.5857483148574829, "grad_norm": 0.14855723530054094, "loss": 0.4893343299627304, "time": 2.8554691791534426, "epoch": 96, "memory": 36824, "step": 29935}
{"lr": 0.0015786951995520016, "data_time": 0.12791593074798585, "grad_norm": 0.1576172411441803, "loss": 0.495429190993309, "time": 2.917197823524475, "epoch": 96, "memory": 36824, "step": 30035}
{"lr": 0.0015784206429538537, "data_time": 1.1225972414016723, "grad_norm": 0.16451793611049653, "loss": 0.4890455901622772, "time": 2.7142708778381346, "epoch": 97, "memory": 36824, "step": 30148}
{"lr": 0.0015781762265617404, "data_time": 0.8165991067886352, "grad_norm": 0.14258479550480843, "loss": 0.4925374686717987, "time": 2.794007182121277, "epoch": 97, "memory": 36824, "step": 30248}
{"lr": 0.0015779304529135927, "data_time": 0.19576456546783447, "grad_norm": 0.15491778254508973, "loss": 0.491298645734787, "time": 2.861807441711426, "epoch": 97, "memory": 36824, "step": 30348}
{"lr": 0.0015776510958397391, "data_time": 0.35868325233459475, "grad_norm": 0.15073582604527475, "loss": 0.49080015122890475, "time": 2.1404525995254517, "epoch": 98, "memory": 36824, "step": 30461}
{"lr": 0.0015774024326681434, "data_time": 0.7912956714630127, "grad_norm": 0.13891723603010178, "loss": 0.49662026166915896, "time": 3.700297141075134, "epoch": 98, "memory": 36824, "step": 30561}
{"lr": 0.0015771524135901264, "data_time": 0.003170680999755859, "grad_norm": 0.16640393137931825, "loss": 0.49149452745914457, "time": 2.910555052757263, "epoch": 98, "memory": 36824, "step": 30661}
{"lr": 0.0015768682608140094, "data_time": 1.220927906036377, "grad_norm": 0.15339271426200868, "loss": 0.4939324349164963, "time": 2.8075807094573975, "epoch": 99, "memory": 36824, "step": 30774}
{"lr": 0.0015766153551119001, "data_time": 0.6514979362487793, "grad_norm": 0.1435060530900955, "loss": 0.4952695041894913, "time": 2.84133243560791, "epoch": 99, "memory": 36824, "step": 30874}
{"lr": 0.0015763610948761485, "data_time": 0.3874021053314209, "grad_norm": 0.1371751271188259, "loss": 0.4953141361474991, "time": 3.149503779411316, "epoch": 99, "memory": 36824, "step": 30974}
{"lr": 0.001576072151253153, "data_time": 1.0799083709716797, "grad_norm": 0.15614588633179666, "loss": 0.49226829707622527, "time": 2.7771803379058837, "epoch": 100, "memory": 36824, "step": 31087}
{"lr": 0.0015758150073419898, "data_time": 0.54190034866333, "grad_norm": 0.14076382517814637, "loss": 0.4893141448497772, "time": 2.9517688512802125, "epoch": 100, "memory": 36824, "step": 31187}
{"lr": 0.0015755565102931115, "data_time": 0.002649378776550293, "grad_norm": 0.17122355699539185, "loss": 0.48848003447055816, "time": 2.8995813608169554, "epoch": 100, "memory": 36824, "step": 31287}
{"lr": 0.0015752627807604897, "data_time": 1.1497643709182739, "grad_norm": 0.18067167103290557, "loss": 0.49321954548358915, "time": 2.857913684844971, "epoch": 101, "memory": 36824, "step": 31400}
{"lr": 0.0015750014030341513, "data_time": 0.5779263973236084, "grad_norm": 0.17432174533605577, "loss": 0.494815793633461, "time": 2.8084349155426027, "epoch": 101, "memory": 36824, "step": 31500}
{"lr": 0.0015747386735891471, "data_time": 0.003120708465576172, "grad_norm": 0.1579004555940628, "loss": 0.4972172319889069, "time": 2.8224162817001344, "epoch": 101, "memory": 36824, "step": 31600}
{"lr": 0.0015744401631659242, "data_time": 0.9689062118530274, "grad_norm": 0.14549268558621406, "loss": 0.49426826536655427, "time": 2.80120313167572, "epoch": 102, "memory": 36824, "step": 31713}
{"lr": 0.001574174556090637, "data_time": 0.47012465000152587, "grad_norm": 0.16624009758234023, "loss": 0.48952086865901945, "time": 2.8464616775512694, "epoch": 102, "memory": 36824, "step": 31813}
{"lr": 0.0015739075987388287, "data_time": 0.003126716613769531, "grad_norm": 0.14790882244706155, "loss": 0.4887158662080765, "time": 2.8307447910308836, "epoch": 102, "memory": 36824, "step": 31913}
{"lr": 0.0015736043125257261, "data_time": 1.0212048530578612, "grad_norm": 0.15744113475084304, "loss": 0.49178656935691833, "time": 2.8344753980636597, "epoch": 103, "memory": 36824, "step": 32026}
{"lr": 0.0015733344806399814, "data_time": 0.39340736865997317, "grad_norm": 0.14662388116121292, "loss": 0.4923175185918808, "time": 3.2906365394592285, "epoch": 103, "memory": 36824, "step": 32126}
{"lr": 0.0015730632999429327, "data_time": 0.0026900053024291994, "grad_norm": 0.16878741085529328, "loss": 0.48894629180431365, "time": 2.272587013244629, "epoch": 103, "memory": 36824, "step": 32226}
{"lr": 0.0015727552431222711, "data_time": 1.0582629442214966, "grad_norm": 0.1411567859351635, "loss": 0.49292066097259524, "time": 2.7974571228027343, "epoch": 104, "memory": 36824, "step": 32339}
{"lr": 0.0015724811910367506, "data_time": 0.5439557075500489, "grad_norm": 0.13859800025820732, "loss": 0.4904772639274597, "time": 2.7720304489135743, "epoch": 104, "memory": 36824, "step": 32439}
{"lr": 0.0015722057916281902, "data_time": 0.11765518188476562, "grad_norm": 0.15005693212151527, "loss": 0.49262738823890684, "time": 3.0837355852127075, "epoch": 104, "memory": 36824, "step": 32539}
{"lr": 0.0015718929694638136, "data_time": 1.0142081260681153, "grad_norm": 0.14484327509999276, "loss": 0.49110823273658755, "time": 2.7938563585281373, "epoch": 105, "memory": 36824, "step": 32652}
{"lr": 0.0015716147018613117, "data_time": 0.540821123123169, "grad_norm": 0.18489229306578636, "loss": 0.49149995744228364, "time": 2.797251582145691, "epoch": 105, "memory": 36824, "step": 32752}
{"lr": 0.0015713350884470588, "data_time": 0.003706550598144531, "grad_norm": 0.15046572536230088, "loss": 0.49223125278949736, "time": 2.7587390422821043, "epoch": 105, "memory": 36824, "step": 32852}
{"lr": 0.0015710175062842355, "data_time": 0.9952075242996216, "grad_norm": 0.14476484730839728, "loss": 0.4908213376998901, "time": 2.7748960494995116, "epoch": 106, "memory": 36824, "step": 32965}
{"lr": 0.0015707350279195783, "data_time": 0.26813042163848877, "grad_norm": 0.16497597247362136, "loss": 0.493675684928894, "time": 2.7625256538391114, "epoch": 106, "memory": 36824, "step": 33065}
{"lr": 0.0015704512052774529, "data_time": 0.003152203559875488, "grad_norm": 0.1491771548986435, "loss": 0.49300563633441924, "time": 2.8020726680755614, "epoch": 106, "memory": 36824, "step": 33165}
{"lr": 0.0015701288685427857, "data_time": 0.8197510004043579, "grad_norm": 0.15297755524516105, "loss": 0.48801523447036743, "time": 2.8065322160720827, "epoch": 107, "memory": 36824, "step": 33278}
{"lr": 0.0015698421842427503, "data_time": 0.640846562385559, "grad_norm": 0.15736745446920394, "loss": 0.4899230420589447, "time": 2.771300959587097, "epoch": 107, "memory": 36824, "step": 33378}
{"lr": 0.001569554157222498, "data_time": 0.17615203857421874, "grad_norm": 0.15235378742218017, "loss": 0.4921516329050064, "time": 2.7791481256484984, "epoch": 107, "memory": 36824, "step": 33478}
{"lr": 0.0015692270714238311, "data_time": 1.0638689517974853, "grad_norm": 0.14779627695679665, "loss": 0.4888742804527283, "time": 2.7762194156646727, "epoch": 108, "memory": 36824, "step": 33591}
{"lr": 0.0015689361860870593, "data_time": 0.553639030456543, "grad_norm": 0.15086545720696448, "loss": 0.4920086055994034, "time": 2.8345606565475463, "epoch": 108, "memory": 36824, "step": 33691}
{"lr": 0.001568643959610267, "data_time": 0.002556252479553223, "grad_norm": 0.1704072967171669, "loss": 0.49283936619758606, "time": 2.7418970346450804, "epoch": 108, "memory": 36824, "step": 33791}
{"lr": 0.0015683121303365947, "data_time": 0.8474687337875366, "grad_norm": 0.13973002806305884, "loss": 0.4931688904762268, "time": 2.7325610160827636, "epoch": 109, "memory": 36824, "step": 33904}
{"lr": 0.0015680170489335127, "data_time": 0.43222181797027587, "grad_norm": 0.14420052766799926, "loss": 0.4918191730976105, "time": 2.7378206968307497, "epoch": 109, "memory": 36824, "step": 34004}
{"lr": 0.0015677206279935247, "data_time": 0.0026584625244140624, "grad_norm": 0.16287730187177657, "loss": 0.4941539913415909, "time": 2.9186709403991697, "epoch": 109, "memory": 36824, "step": 34104}
{"lr": 0.0015673840609148931, "data_time": 1.1037194013595581, "grad_norm": 0.14383596926927567, "loss": 0.49182225167751314, "time": 2.796754240989685, "epoch": 110, "memory": 36824, "step": 34217}
{"lr": 0.0015670847884876316, "data_time": 0.43651442527770995, "grad_norm": 0.1759423792362213, "loss": 0.48979533314704893, "time": 2.7517237186431887, "epoch": 110, "memory": 36824, "step": 34317}
{"lr": 0.0015667841781494669, "data_time": 0.0029807806015014647, "grad_norm": 0.17183827236294746, "loss": 0.493230465054512, "time": 2.7824597358703613, "epoch": 110, "memory": 36824, "step": 34417}
{"lr": 0.0015664428790168803, "data_time": 1.467151117324829, "grad_norm": 0.15612845122814178, "loss": 0.49440919756889345, "time": 3.438722085952759, "epoch": 111, "memory": 36824, "step": 34530}
{"lr": 0.0015661394206791773, "data_time": 0.002234363555908203, "grad_norm": 0.15007721185684203, "loss": 0.49099906384944914, "time": 2.291109561920166, "epoch": 111, "memory": 36824, "step": 34630}
{"lr": 0.0015658346260794345, "data_time": 0.003117012977600098, "grad_norm": 0.14113682731986046, "loss": 0.48663744032382966, "time": 2.823586845397949, "epoch": 111, "memory": 36824, "step": 34730}
{"lr": 0.0015654886007247524, "data_time": 1.0724066734313964, "grad_norm": 0.1454206496477127, "loss": 0.49173533618450166, "time": 2.761855864524841, "epoch": 112, "memory": 36824, "step": 34843}
{"lr": 0.0015651809616618734, "data_time": 0.4558589935302734, "grad_norm": 0.14503588825464248, "loss": 0.48999080061912537, "time": 2.6714900732040405, "epoch": 112, "memory": 36824, "step": 34943}
{"lr": 0.0015648719880086489, "data_time": 0.0025500059127807617, "grad_norm": 0.13782546445727348, "loss": 0.49756703674793246, "time": 2.702514815330505, "epoch": 112, "memory": 36824, "step": 35043}
{"lr": 0.00156452124234449, "data_time": 0.7646732568740845, "grad_norm": 0.14401569366455078, "loss": 0.48705486953258514, "time": 2.8017810344696046, "epoch": 113, "memory": 36824, "step": 35156}
{"lr": 0.001564209427813138, "data_time": 0.0603771448135376, "grad_norm": 0.144423408806324, "loss": 0.4914714366197586, "time": 2.7779088020324707, "epoch": 113, "memory": 36824, "step": 35256}
{"lr": 0.001563896280385942, "data_time": 0.003033280372619629, "grad_norm": 0.13693142160773278, "loss": 0.4911514908075333, "time": 2.7854737520217894, "epoch": 113, "memory": 36824, "step": 35356}
{"lr": 0.0015635408204055816, "data_time": 1.0786246538162232, "grad_norm": 0.1720990628004074, "loss": 0.49079001545906065, "time": 2.749859642982483, "epoch": 114, "memory": 36824, "step": 35469}
{"lr": 0.001563224835733807, "data_time": 0.5476625919342041, "grad_norm": 0.15005628392100334, "loss": 0.4906650364398956, "time": 2.7383474111557007, "epoch": 114, "memory": 36824, "step": 35569}
{"lr": 0.0015629075198834656, "data_time": 0.3241292953491211, "grad_norm": 0.1503075808286667, "loss": 0.48997305929660795, "time": 2.9800456285476686, "epoch": 114, "memory": 36824, "step": 35669}
{"lr": 0.0015625473516607332, "data_time": 1.442792010307312, "grad_norm": 0.1636952869594097, "loss": 0.489547199010849, "time": 2.8061019659042357, "epoch": 115, "memory": 36824, "step": 35782}
{"lr": 0.001562227202247841, "data_time": 0.7336848974227905, "grad_norm": 0.1570928104221821, "loss": 0.4866767734289169, "time": 2.7894354820251466, "epoch": 115, "memory": 36824, "step": 35882}
{"lr": 0.0015619057233964054, "data_time": 0.0032253265380859375, "grad_norm": 0.14467575773596764, "loss": 0.4916429340839386, "time": 2.9624289751052855, "epoch": 115, "memory": 36824, "step": 35982}
{"lr": 0.0015615408530855867, "data_time": 1.6890657663345336, "grad_norm": 0.14827705174684525, "loss": 0.491574889421463, "time": 3.5355298042297365, "epoch": 116, "memory": 36824, "step": 36095}
{"lr": 0.0015612165444020495, "data_time": 0.5720157146453857, "grad_norm": 0.1404827944934368, "loss": 0.49389269053936, "time": 2.871482253074646, "epoch": 116, "memory": 36824, "step": 36195}
{"lr": 0.0015608909080427056, "data_time": 0.002921867370605469, "grad_norm": 0.15490140691399573, "loss": 0.4880969822406769, "time": 3.296871781349182, "epoch": 116, "memory": 36824, "step": 36295}
{"lr": 0.001560521341878422, "data_time": 1.086503553390503, "grad_norm": 0.18553740978240968, "loss": 0.4924645900726318, "time": 2.8108267784118652, "epoch": 117, "memory": 36824, "step": 36408}
{"lr": 0.0015601928794657805, "data_time": 1.0525023460388183, "grad_norm": 0.14170809984207153, "loss": 0.4880115926265717, "time": 3.5053815841674805, "epoch": 117, "memory": 36824, "step": 36508}
{"lr": 0.0015598630911627571, "data_time": 0.002642369270324707, "grad_norm": 0.14492422938346863, "loss": 0.490095391869545, "time": 2.67588050365448, "epoch": 117, "memory": 36824, "step": 36608}
{"lr": 0.0015594888354598776, "data_time": 0.9154970169067382, "grad_norm": 0.17030302733182906, "loss": 0.4944774955511093, "time": 2.714861011505127, "epoch": 118, "memory": 36824, "step": 36721}
{"lr": 0.0015591562249306448, "data_time": 0.12345798015594482, "grad_norm": 0.1585806630551815, "loss": 0.4913987725973129, "time": 2.773222231864929, "epoch": 118, "memory": 36824, "step": 36821}
{"lr": 0.0015588222903191132, "data_time": 0.0026612281799316406, "grad_norm": 0.15911894887685776, "loss": 0.4869822829961777, "time": 2.853984332084656, "epoch": 118, "memory": 36824, "step": 36921}
{"lr": 0.001558443351472635, "data_time": 0.4575615644454956, "grad_norm": 0.1759224057197571, "loss": 0.4906124800443649, "time": 2.3974231481552124, "epoch": 119, "memory": 36824, "step": 37034}
{"lr": 0.0015581065985102104, "data_time": 0.48951129913330077, "grad_norm": 0.1558332063257694, "loss": 0.4890674531459808, "time": 2.728988218307495, "epoch": 119, "memory": 36824, "step": 37134}
{"lr": 0.001557768523296191, "data_time": 0.004034590721130371, "grad_norm": 0.17229796946048737, "loss": 0.49089682698249815, "time": 3.468965697288513, "epoch": 119, "memory": 36824, "step": 37234}
{"lr": 0.001557384907781133, "data_time": 0.7846734762191773, "grad_norm": 0.14782005324959754, "loss": 0.4907799273729324, "time": 2.713718795776367, "epoch": 120, "memory": 36824, "step": 37347}
{"lr": 0.0015570440181396936, "data_time": 0.01547846794128418, "grad_norm": 0.16355260983109474, "loss": 0.49243377447128295, "time": 2.695782732963562, "epoch": 120, "memory": 36824, "step": 37447}
{"lr": 0.0015567018080999587, "data_time": 0.0029366731643676756, "grad_norm": 0.13527385741472245, "loss": 0.4844296485185623, "time": 2.7090261697769167, "epoch": 120, "memory": 36824, "step": 37547}
{"lr": 0.0015563135224712485, "data_time": 1.0982057332992554, "grad_norm": 0.18725989013910294, "loss": 0.4901551783084869, "time": 2.793769598007202, "epoch": 121, "memory": 36824, "step": 37660}
{"lr": 0.0015559685019756581, "data_time": 0.5928291559219361, "grad_norm": 0.13316941633820534, "loss": 0.4954206794500351, "time": 2.6818819522857664, "epoch": 121, "memory": 36824, "step": 37760}
{"lr": 0.0015556221629576337, "data_time": 0.0031507730484008787, "grad_norm": 0.14918614849448203, "loss": 0.48742237985134124, "time": 2.764302659034729, "epoch": 121, "memory": 36824, "step": 37860}
{"lr": 0.0015552292138500047, "data_time": 1.363974928855896, "grad_norm": 0.14655452966690063, "loss": 0.4917866766452789, "time": 2.7613006591796876, "epoch": 122, "memory": 36824, "step": 37973}
{"lr": 0.0015548800683957114, "data_time": 1.1574882984161377, "grad_norm": 0.13514941260218621, "loss": 0.49478923380374906, "time": 2.93781316280365, "epoch": 122, "memory": 36824, "step": 38073}
{"lr": 0.0015545296063173765, "data_time": 0.5173548221588135, "grad_norm": 0.15784654542803764, "loss": 0.491006064414978, "time": 2.923956561088562, "epoch": 122, "memory": 36824, "step": 38173}
{"lr": 0.0015541320004452456, "data_time": 1.424489688873291, "grad_norm": 0.14643418192863464, "loss": 0.4891704201698303, "time": 2.8617273330688477, "epoch": 123, "memory": 36824, "step": 38286}
{"lr": 0.0015537787359981847, "data_time": 0.9309105157852173, "grad_norm": 0.15283529832959175, "loss": 0.48668522834777833, "time": 2.6821390867233275, "epoch": 123, "memory": 36824, "step": 38386}
{"lr": 0.0015534241568479702, "data_time": 0.37134177684783937, "grad_norm": 0.14273309260606765, "loss": 0.4925883233547211, "time": 2.7516437292099, "epoch": 123, "memory": 36824, "step": 38486}
{"lr": 0.0015530219010053239, "data_time": 1.1084468364715576, "grad_norm": 0.15040176436305047, "loss": 0.4906671971082687, "time": 2.7214481592178346, "epoch": 124, "memory": 36824, "step": 38599}
{"lr": 0.0015526645236018074, "data_time": 0.6983120918273926, "grad_norm": 0.17096619307994843, "loss": 0.49051032960414886, "time": 2.739987540245056, "epoch": 124, "memory": 36824, "step": 38699}
{"lr": 0.0015523058334384909, "data_time": 0.0039003372192382814, "grad_norm": 0.17812639325857163, "loss": 0.4902894586324692, "time": 2.1810489654541017, "epoch": 124, "memory": 36824, "step": 38799}
{"lr": 0.0015518989344987708, "data_time": 1.2722625017166138, "grad_norm": 0.14397543519735337, "loss": 0.49196955263614656, "time": 3.371038317680359, "epoch": 125, "memory": 36824, "step": 38912}
{"lr": 0.001551537450245393, "data_time": 0.13818349838256835, "grad_norm": 0.15368214175105094, "loss": 0.4928911864757538, "time": 2.7361132383346556, "epoch": 125, "memory": 36824, "step": 39012}
{"lr": 0.0015511746551979996, "data_time": 0.003038477897644043, "grad_norm": 0.1525849349796772, "loss": 0.4884998381137848, "time": 2.520603322982788, "epoch": 125, "memory": 36824, "step": 39112}
{"lr": 0.0015507631201139866, "data_time": 1.469706916809082, "grad_norm": 0.14641588777303696, "loss": 0.488456004858017, "time": 2.8326839208602905, "epoch": 126, "memory": 36824, "step": 39225}
{"lr": 0.0015503975351875154, "data_time": 1.1782893419265748, "grad_norm": 0.1636376142501831, "loss": 0.4847003728151321, "time": 3.3850094795227053, "epoch": 126, "memory": 36824, "step": 39325}
{"lr": 0.0015500306414552118, "data_time": 0.034307456016540526, "grad_norm": 0.15218326896429063, "loss": 0.4936488002538681, "time": 2.6811028003692625, "epoch": 126, "memory": 36824, "step": 39425}
{"lr": 0.0015496144772589046, "data_time": 1.000523042678833, "grad_norm": 0.15379230976104735, "loss": 0.4914723873138428, "time": 2.7399551630020142, "epoch": 127, "memory": 36824, "step": 39538}
{"lr": 0.001549244797906178, "data_time": 0.2871047258377075, "grad_norm": 0.13519426062703133, "loss": 0.4885978728532791, "time": 2.832485890388489, "epoch": 127, "memory": 36824, "step": 39638}
{"lr": 0.0015488738117581673, "data_time": 0.0032159566879272463, "grad_norm": 0.1516407735645771, "loss": 0.4910569220781326, "time": 2.7411222219467164, "epoch": 127, "memory": 36824, "step": 39738}
{"lr": 0.0015484530255606636, "data_time": 1.3541079998016357, "grad_norm": 0.14479494839906693, "loss": 0.49140428602695463, "time": 2.9688896894454957, "epoch": 128, "memory": 36824, "step": 39851}
{"lr": 0.001548079258098483, "data_time": 0.6122367143630981, "grad_norm": 0.13985482677817346, "loss": 0.4903041243553162, "time": 2.904416561126709, "epoch": 128, "memory": 36824, "step": 39951}
{"lr": 0.0015477041858738929, "data_time": 0.002540135383605957, "grad_norm": 0.1437758296728134, "loss": 0.4886784374713898, "time": 2.813563680648804, "epoch": 128, "memory": 36824, "step": 40051}
{"lr": 0.001547278784865268, "data_time": 0.05050129890441894, "grad_norm": 0.14802079722285272, "loss": 0.4879016369581223, "time": 1.8402687788009644, "epoch": 129, "memory": 36824, "step": 40164}
{"lr": 0.001546900935680286, "data_time": 0.002346515655517578, "grad_norm": 0.15110916197299956, "loss": 0.4913953423500061, "time": 1.9046396017074585, "epoch": 129, "memory": 36824, "step": 40264}
{"lr": 0.0015465217837880658, "data_time": 0.003915262222290039, "grad_norm": 0.13880405500531195, "loss": 0.49158152639865876, "time": 1.8528555154800415, "epoch": 129, "memory": 36824, "step": 40364}
{"lr": 0.0015460917752372493, "data_time": 0.2539195537567139, "grad_norm": 0.17168120369315149, "loss": 0.48995089828968047, "time": 1.89880268573761, "epoch": 130, "memory": 36824, "step": 40477}
{"lr": 0.0015457098507858666, "data_time": 0.0023331642150878906, "grad_norm": 0.13142124935984612, "loss": 0.48997806310653685, "time": 1.9419597387313843, "epoch": 130, "memory": 36824, "step": 40577}
{"lr": 0.0015453266257046772, "data_time": 0.0032972097396850586, "grad_norm": 0.15726147964596748, "loss": 0.4877583563327789, "time": 1.8777299165725707, "epoch": 130, "memory": 36824, "step": 40677}
{"lr": 0.0015448920169593304, "data_time": 0.4365863800048828, "grad_norm": 0.1630852647125721, "loss": 0.49022970497608187, "time": 1.890430736541748, "epoch": 131, "memory": 36824, "step": 40790}
{"lr": 0.001544506023767582, "data_time": 0.002379631996154785, "grad_norm": 0.1580561578273773, "loss": 0.48679582178592684, "time": 1.8695910930633546, "epoch": 131, "memory": 36824, "step": 40890}
{"lr": 0.0015441187320456825, "data_time": 0.00343778133392334, "grad_norm": 0.16640733927488327, "loss": 0.4911778837442398, "time": 1.885295844078064, "epoch": 131, "memory": 36824, "step": 40990}
{"lr": 0.0015436795305320673, "data_time": 0.287910008430481, "grad_norm": 0.16087525114417076, "loss": 0.4886843621730804, "time": 1.8224013090133666, "epoch": 132, "memory": 36824, "step": 41103}
{"lr": 0.0015432894751955136, "data_time": 0.002197599411010742, "grad_norm": 0.14300960302352905, "loss": 0.48878253996372223, "time": 1.8303441286087037, "epoch": 132, "memory": 36824, "step": 41203}
{"lr": 0.0015428981234506503, "data_time": 0.0037034273147583006, "grad_norm": 0.16893597319722176, "loss": 0.4879129439592361, "time": 2.141610097885132, "epoch": 132, "memory": 36824, "step": 41303}
{"lr": 0.00154245433667351, "data_time": 0.27598938941955564, "grad_norm": 0.15318345949053763, "loss": 0.4887199610471725, "time": 1.838233470916748, "epoch": 133, "memory": 36824, "step": 41416}
{"lr": 0.0015420602258571197, "data_time": 0.0027457475662231445, "grad_norm": 0.1418866202235222, "loss": 0.4884929805994034, "time": 1.8630084991455078, "epoch": 133, "memory": 36824, "step": 41516}
{"lr": 0.0015416648207764174, "data_time": 0.0026497840881347656, "grad_norm": 0.14375557899475097, "loss": 0.4861627072095871, "time": 1.776255989074707, "epoch": 133, "memory": 36824, "step": 41616}
{"lr": 0.0015412164563188425, "data_time": 0.3438747406005859, "grad_norm": 0.16472129821777343, "loss": 0.48852753043174746, "time": 1.894106388092041, "epoch": 134, "memory": 36824, "step": 41729}
{"lr": 0.0015408182967568804, "data_time": 0.002224898338317871, "grad_norm": 0.15158528834581375, "loss": 0.48403508961200714, "time": 1.8921716928482055, "epoch": 134, "memory": 36824, "step": 41829}
{"lr": 0.0015404188450967166, "data_time": 0.0025879383087158204, "grad_norm": 0.13706096336245538, "loss": 0.49037293195724485, "time": 2.0601258754730223, "epoch": 134, "memory": 36824, "step": 41929}
{"lr": 0.0015399659106200216, "data_time": 0.002633547782897949, "grad_norm": 0.1317431464791298, "loss": 0.493068066239357, "time": 1.8491318225860596, "epoch": 135, "memory": 36824, "step": 42042}
{"lr": 0.0015395637091159375, "data_time": 0.002366495132446289, "grad_norm": 0.15689056664705275, "loss": 0.4905002623796463, "time": 1.851943016052246, "epoch": 135, "memory": 36824, "step": 42142}
{"lr": 0.0015391602177018402, "data_time": 0.0036581277847290037, "grad_norm": 0.16871990635991096, "loss": 0.4899961829185486, "time": 1.8390794277191163, "epoch": 135, "memory": 36824, "step": 42242}
{"lr": 0.0015387027209454217, "data_time": 0.298089599609375, "grad_norm": 0.13380712196230887, "loss": 0.48454039096832274, "time": 1.8608761072158813, "epoch": 136, "memory": 36824, "step": 42355}
{"lr": 0.0015382964843717315, "data_time": 0.00234067440032959, "grad_norm": 0.1483435869216919, "loss": 0.48611031472682953, "time": 1.8695741415023803, "epoch": 136, "memory": 36824, "step": 42455}
{"lr": 0.0015378889600982544, "data_time": 0.002693319320678711, "grad_norm": 0.1655420996248722, "loss": 0.49179061949253083, "time": 1.8737166404724122, "epoch": 136, "memory": 36824, "step": 42555}
{"lr": 0.0015374269088794716, "data_time": 0.47337181568145753, "grad_norm": 0.14861202910542487, "loss": 0.49091372787952425, "time": 1.8464950799942017, "epoch": 137, "memory": 36824, "step": 42668}
{"lr": 0.0015370166441776348, "data_time": 0.0027909278869628906, "grad_norm": 0.14360415637493135, "loss": 0.486488077044487, "time": 1.831339168548584, "epoch": 137, "memory": 36824, "step": 42768}
{"lr": 0.0015366050940082464, "data_time": 0.002673649787902832, "grad_norm": 0.14748450294137, "loss": 0.48746935427188876, "time": 1.889861273765564, "epoch": 137, "memory": 36824, "step": 42868}
{"lr": 0.0015361384962222795, "data_time": 0.2438112497329712, "grad_norm": 0.12808237448334694, "loss": 0.48990482091903687, "time": 1.8204386711120606, "epoch": 138, "memory": 36824, "step": 42981}
{"lr": 0.0015357242104025873, "data_time": 0.0024124622344970704, "grad_norm": 0.13856837525963783, "loss": 0.482435867190361, "time": 1.9109686374664308, "epoch": 138, "memory": 36824, "step": 43081}
{"lr": 0.001535308641369542, "data_time": 0.0033507585525512696, "grad_norm": 0.1522465616464615, "loss": 0.48812333047389983, "time": 1.8805259943008423, "epoch": 138, "memory": 36824, "step": 43181}
{"lr": 0.0015348375049892603, "data_time": 0.002354121208190918, "grad_norm": 0.1917426034808159, "loss": 0.48789175152778624, "time": 1.9116263151168824, "epoch": 139, "memory": 36824, "step": 43294}
{"lr": 0.0015344192051307158, "data_time": 0.0027731895446777345, "grad_norm": 0.13875994011759757, "loss": 0.4848204642534256, "time": 1.8286206483840943, "epoch": 139, "memory": 36824, "step": 43394}
{"lr": 0.001533999624334944, "data_time": 0.0025690793991088867, "grad_norm": 0.14356564953923226, "loss": 0.49083044826984407, "time": 1.8841646671295167, "epoch": 139, "memory": 36824, "step": 43494}
{"lr": 0.0015335239574107678, "data_time": 0.2757513999938965, "grad_norm": 0.13970597237348556, "loss": 0.48778065145015714, "time": 1.871006679534912, "epoch": 140, "memory": 36824, "step": 43607}
{"lr": 0.0015331016506609624, "data_time": 0.0022126197814941405, "grad_norm": 0.1404695764183998, "loss": 0.491223019361496, "time": 2.1171541929244997, "epoch": 140, "memory": 36824, "step": 43707}
{"lr": 0.0015326780652719398, "data_time": 0.0033042430877685547, "grad_norm": 0.14126549288630486, "loss": 0.4843032002449036, "time": 1.8763181924819947, "epoch": 140, "memory": 36824, "step": 43807}
{"lr": 0.0015321978759317045, "data_time": 0.33174197673797606, "grad_norm": 0.14100070372223855, "loss": 0.4910853773355484, "time": 1.8806027889251709, "epoch": 141, "memory": 36824, "step": 43920}
{"lr": 0.001531771569506694, "data_time": 0.002278780937194824, "grad_norm": 0.14349483624100684, "loss": 0.4877608746290207, "time": 1.7851258039474487, "epoch": 141, "memory": 36824, "step": 44020}
{"lr": 0.0015313439867623302, "data_time": 0.002513742446899414, "grad_norm": 0.1559663861989975, "loss": 0.4926172852516174, "time": 1.8515422821044922, "epoch": 141, "memory": 36824, "step": 44120}
{"lr": 0.0015308592832111478, "data_time": 0.2052889347076416, "grad_norm": 0.1570497915148735, "loss": 0.4899255156517029, "time": 1.8904024839401246, "epoch": 142, "memory": 36824, "step": 44233}
{"lr": 0.001530428984395331, "data_time": 0.002253437042236328, "grad_norm": 0.13069605007767676, "loss": 0.48743380308151246, "time": 2.1044310331344604, "epoch": 142, "memory": 36824, "step": 44333}
{"lr": 0.0015299974116018307, "data_time": 0.0033878326416015626, "grad_norm": 0.1586444541811943, "loss": 0.48754796087741853, "time": 1.8352012634277344, "epoch": 142, "memory": 36824, "step": 44433}
{"lr": 0.001529508202121946, "data_time": 0.002450299263000488, "grad_norm": 0.14584071785211564, "loss": 0.4905666261911392, "time": 1.8644546031951905, "epoch": 143, "memory": 36824, "step": 44546}
{"lr": 0.0015290739182679458, "data_time": 0.0022096633911132812, "grad_norm": 0.1710829347372055, "loss": 0.4874151736497879, "time": 1.8416638612747191, "epoch": 143, "memory": 36824, "step": 44646}
{"lr": 0.0015286383627996973, "data_time": 0.002867460250854492, "grad_norm": 0.14005928561091424, "loss": 0.4854299157857895, "time": 1.84623544216156, "epoch": 143, "memory": 36824, "step": 44746}
{"lr": 0.00152814465575035, "data_time": 0.10106110572814941, "grad_norm": 0.13845158442854882, "loss": 0.485109880566597, "time": 1.8925172090530396, "epoch": 144, "memory": 36824, "step": 44859}
{"lr": 0.0015277063942788773, "data_time": 0.0023481130599975588, "grad_norm": 0.15670515075325966, "loss": 0.4849437981843948, "time": 1.817297601699829, "epoch": 144, "memory": 36824, "step": 44959}
{"lr": 0.0015272668635783256, "data_time": 0.002535104751586914, "grad_norm": 0.18436166644096375, "loss": 0.4886041134595871, "time": 1.8827341318130493, "epoch": 144, "memory": 36824, "step": 45059}
{"lr": 0.0015267686673956081, "data_time": 0.06741914749145508, "grad_norm": 0.15033795759081842, "loss": 0.492413330078125, "time": 1.8584969758987426, "epoch": 145, "memory": 36824, "step": 45172}
{"lr": 0.001526326435795343, "data_time": 0.002450251579284668, "grad_norm": 0.14708545207977294, "loss": 0.4892513781785965, "time": 1.8170949697494507, "epoch": 145, "memory": 36824, "step": 45272}
{"lr": 0.0015258829373728528, "data_time": 0.0036326885223388673, "grad_norm": 0.15355764999985694, "loss": 0.4833893060684204, "time": 1.8412903785705566, "epoch": 145, "memory": 36824, "step": 45372}
{"lr": 0.0015253802605695635, "data_time": 0.3181540727615356, "grad_norm": 0.15614101141691208, "loss": 0.4866273760795593, "time": 1.831234860420227, "epoch": 146, "memory": 36824, "step": 45485}
{"lr": 0.0015249340663970256, "data_time": 0.002190852165222168, "grad_norm": 0.13988805562257767, "loss": 0.48963678181171416, "time": 1.844074010848999, "epoch": 146, "memory": 36824, "step": 45585}
{"lr": 0.0015244866078307632, "data_time": 0.0027966022491455076, "grad_norm": 0.13430682197213173, "loss": 0.48724083602428436, "time": 1.8489123344421388, "epoch": 146, "memory": 36824, "step": 45685}
{"lr": 0.001523979458996261, "data_time": 0.08336608409881592, "grad_norm": 0.14244548305869104, "loss": 0.4897909164428711, "time": 1.8344601154327393, "epoch": 147, "memory": 36824, "step": 45798}
{"lr": 0.001523529309875673, "data_time": 0.0024289846420288085, "grad_norm": 0.1382970467209816, "loss": 0.48452170491218566, "time": 1.9322899103164672, "epoch": 147, "memory": 36824, "step": 45898}
{"lr": 0.0015230778988114693, "data_time": 0.0032564640045166016, "grad_norm": 0.17464014887809753, "loss": 0.49111218750476837, "time": 1.8216176748275756, "epoch": 147, "memory": 36824, "step": 45998}
{"lr": 0.0015225662866115302, "data_time": 0.166633939743042, "grad_norm": 0.14845166578888894, "loss": 0.48681714236736295, "time": 1.943726396560669, "epoch": 148, "memory": 36824, "step": 46111}
{"lr": 0.001522112190234701, "data_time": 0.0022585391998291016, "grad_norm": 0.15836238265037536, "loss": 0.49255080223083497, "time": 1.8063243627548218, "epoch": 148, "memory": 36824, "step": 46211}
{"lr": 0.001521656834385924, "data_time": 0.003346085548400879, "grad_norm": 0.14044601321220399, "loss": 0.4888848692178726, "time": 1.915196681022644, "epoch": 148, "memory": 36824, "step": 46311}
{"lr": 0.0015211407675625882, "data_time": 0.0028551101684570314, "grad_norm": 0.14274937510490418, "loss": 0.48955962657928465, "time": 1.819384789466858, "epoch": 149, "memory": 36824, "step": 46424}
{"lr": 0.0015206827316887731, "data_time": 0.002455306053161621, "grad_norm": 0.13856853768229485, "loss": 0.48750282526016236, "time": 1.8367114782333374, "epoch": 149, "memory": 36824, "step": 46524}
{"lr": 0.0015202234388361985, "data_time": 0.0032564401626586914, "grad_norm": 0.15530907437205316, "loss": 0.4913801282644272, "time": 1.9376479387283325, "epoch": 149, "memory": 36824, "step": 46624}
{"lr": 0.0015197029262076227, "data_time": 0.3992577314376831, "grad_norm": 0.1438778430223465, "loss": 0.49031619131565096, "time": 1.8449979066848754, "epoch": 150, "memory": 36824, "step": 46737}
{"lr": 0.001519240958663388, "data_time": 0.026536107063293457, "grad_norm": 0.1544604130089283, "loss": 0.4896046847105026, "time": 1.880551314353943, "epoch": 150, "memory": 36824, "step": 46837}
{"lr": 0.0015187777366550655, "data_time": 0.12178430557250977, "grad_norm": 0.13836592435836792, "loss": 0.4850200802087784, "time": 1.841142463684082, "epoch": 150, "memory": 36824, "step": 46937}
{"lr": 0.001518252787115375, "data_time": 0.18673527240753174, "grad_norm": 0.1339181587100029, "loss": 0.48766532838344573, "time": 1.8588060855865478, "epoch": 151, "memory": 36824, "step": 47050}
{"lr": 0.0015177868957944752, "data_time": 0.0024001598358154297, "grad_norm": 0.16195552051067352, "loss": 0.48591889142990113, "time": 1.9025407552719116, "epoch": 151, "memory": 36824, "step": 47150}
{"lr": 0.0015173197525455893, "data_time": 0.0026614189147949217, "grad_norm": 0.15344600528478622, "loss": 0.4836051046848297, "time": 1.8897766828536988, "epoch": 151, "memory": 36824, "step": 47250}
{"lr": 0.001516790375064723, "data_time": 0.27097060680389407, "grad_norm": 0.14967898204922675, "loss": 0.4847033143043518, "time": 1.863537645339966, "epoch": 152, "memory": 36824, "step": 47363}
{"lr": 0.0015163205679279536, "data_time": 0.0025435924530029298, "grad_norm": 0.1315729059278965, "loss": 0.490524485707283, "time": 1.9995001792907714, "epoch": 152, "memory": 36824, "step": 47463}
{"lr": 0.0015158495114206937, "data_time": 0.0025634288787841795, "grad_norm": 0.13142106607556342, "loss": 0.48757983446121217, "time": 1.901903247833252, "epoch": 152, "memory": 36824, "step": 47563}
{"lr": 0.0015153157150442553, "data_time": 0.16482999324798583, "grad_norm": 0.15857459008693695, "loss": 0.4893289625644684, "time": 1.975641679763794, "epoch": 153, "memory": 36824, "step": 47676}
{"lr": 0.0015148420001193255, "data_time": 0.0024301767349243163, "grad_norm": 0.14499666690826415, "loss": 0.48548783659934996, "time": 2.0799516677856444, "epoch": 153, "memory": 36824, "step": 47776}
{"lr": 0.0015143670384027475, "data_time": 0.003174614906311035, "grad_norm": 0.13393393978476525, "loss": 0.4861748874187469, "time": 1.907805848121643, "epoch": 153, "memory": 36824, "step": 47876}
{"lr": 0.0015138288322518413, "data_time": 0.31763644218444825, "grad_norm": 0.13858768120408058, "loss": 0.48900127708911895, "time": 1.907983660697937, "epoch": 154, "memory": 36824, "step": 47989}
{"lr": 0.0015133512176332311, "data_time": 0.0022734642028808595, "grad_norm": 0.15956633538007736, "loss": 0.4860225111246109, "time": 1.9212674856185914, "epoch": 154, "memory": 36824, "step": 48089}
{"lr": 0.0015128723588231216, "data_time": 0.0026600837707519533, "grad_norm": 0.13842366486787797, "loss": 0.48505147397518156, "time": 1.9056445360183716, "epoch": 154, "memory": 36824, "step": 48189}
{"lr": 0.001512329752094206, "data_time": 0.11774663925170899, "grad_norm": 0.14075250700116157, "loss": 0.48926686942577363, "time": 1.8930294275283814, "epoch": 155, "memory": 36824, "step": 48302}
{"lr": 0.001511848245943029, "data_time": 0.0025348424911499023, "grad_norm": 0.14855347573757172, "loss": 0.49011043906211854, "time": 1.8949830055236816, "epoch": 155, "memory": 36824, "step": 48402}
{"lr": 0.0015113654982217616, "data_time": 0.002679252624511719, "grad_norm": 0.14190488159656525, "loss": 0.4886066228151321, "time": 2.0348666667938233, "epoch": 155, "memory": 36824, "step": 48502}
{"lr": 0.0015108185001864894, "data_time": 0.002210259437561035, "grad_norm": 0.1381705090403557, "loss": 0.49135645031929015, "time": 1.8355885744094849, "epoch": 156, "memory": 36824, "step": 48615}
{"lr": 0.0015103331107303613, "data_time": 0.0022733926773071287, "grad_norm": 0.14191448912024499, "loss": 0.48372593224048616, "time": 1.9114308834075928, "epoch": 156, "memory": 36824, "step": 48715}
{"lr": 0.0015098464823467652, "data_time": 0.0030193090438842773, "grad_norm": 0.1366542786359787, "loss": 0.4868148863315582, "time": 1.8311562299728394, "epoch": 156, "memory": 36824, "step": 48815}
{"lr": 0.0015092951023518212, "data_time": 0.311061429977417, "grad_norm": 0.15174573808908462, "loss": 0.4828222543001175, "time": 1.9093662261962892, "epoch": 157, "memory": 36824, "step": 48928}
{"lr": 0.001508805837884706, "data_time": 0.002441072463989258, "grad_norm": 0.14162819534540178, "loss": 0.48500335216522217, "time": 1.8858999490737915, "epoch": 157, "memory": 36824, "step": 49028}
{"lr": 0.0015083153371539156, "data_time": 0.0033514738082885743, "grad_norm": 0.15181687846779823, "loss": 0.48247882425785066, "time": 1.8783491134643555, "epoch": 157, "memory": 36824, "step": 49128}
{"lr": 0.001507759584620864, "data_time": 0.2673605680465698, "grad_norm": 0.13451963365077974, "loss": 0.4865803301334381, "time": 1.9124354124069214, "epoch": 158, "memory": 36824, "step": 49241}
{"lr": 0.0015072664535029428, "data_time": 0.0024463176727294923, "grad_norm": 0.13046964555978774, "loss": 0.4889182984828949, "time": 1.8995587825775146, "epoch": 158, "memory": 36824, "step": 49341}
{"lr": 0.0015067720888062633, "data_time": 0.002746891975402832, "grad_norm": 0.15444734916090966, "loss": 0.491305947303772, "time": 1.9656329154968262, "epoch": 158, "memory": 36824, "step": 49441}
{"lr": 0.001506211973231383, "data_time": 0.14968719482421874, "grad_norm": 0.14936766400933266, "loss": 0.48833903968334197, "time": 1.8314648866653442, "epoch": 159, "memory": 36824, "step": 49554}
{"lr": 0.001505714983888903, "data_time": 0.0024613380432128907, "grad_norm": 0.14881636947393417, "loss": 0.48752264976501464, "time": 1.9103394746780396, "epoch": 159, "memory": 36824, "step": 49654}
{"lr": 0.0015052167636736627, "data_time": 0.0027786970138549806, "grad_norm": 0.14912075474858283, "loss": 0.48836962282657626, "time": 1.8569830417633058, "epoch": 159, "memory": 36824, "step": 49754}
{"lr": 0.0015046522946277824, "data_time": 0.24502394199371338, "grad_norm": 0.1475963443517685, "loss": 0.4886975049972534, "time": 1.8770421743392944, "epoch": 160, "memory": 36824, "step": 49867}
{"lr": 0.001504151455552924, "data_time": 0.0024192333221435547, "grad_norm": 0.14253870770335197, "loss": 0.4885280191898346, "time": 1.9561110734939575, "epoch": 160, "memory": 36824, "step": 49967}
{"lr": 0.0015036493883323317, "data_time": 0.003150320053100586, "grad_norm": 0.17614975795149804, "loss": 0.4831803649663925, "time": 1.8194899082183837, "epoch": 160, "memory": 36824, "step": 50067}
{"lr": 0.0015030805754606684, "data_time": 0.4525080919265747, "grad_norm": 0.1395260848104954, "loss": 0.48909756541252136, "time": 1.8875904560089112, "epoch": 161, "memory": 36824, "step": 50180}
{"lr": 0.0015025758952113871, "data_time": 0.03881561756134033, "grad_norm": 0.13442790135741234, "loss": 0.48557365536689756, "time": 1.9266636371612549, "epoch": 161, "memory": 36824, "step": 50280}
{"lr": 0.001502069989564386, "data_time": 0.002775907516479492, "grad_norm": 0.1484191007912159, "loss": 0.48795682191848755, "time": 1.8814149856567384, "epoch": 161, "memory": 36824, "step": 50380}
{"lr": 0.001501496842586385, "data_time": 0.10377004146575927, "grad_norm": 0.1836984284222126, "loss": 0.48646103739738467, "time": 1.8505286931991578, "epoch": 162, "memory": 36824, "step": 50493}
{"lr": 0.001500988329786276, "data_time": 0.0027456760406494142, "grad_norm": 0.17014296501874923, "loss": 0.48820446729660033, "time": 1.8053860187530517, "epoch": 162, "memory": 36824, "step": 50593}
{"lr": 0.0015004785943573986, "data_time": 0.002742481231689453, "grad_norm": 0.13729844242334366, "loss": 0.4823174953460693, "time": 1.893898320198059, "epoch": 162, "memory": 36824, "step": 50693}
{"lr": 0.0014999011230665588, "data_time": 0.44305384159088135, "grad_norm": 0.14413649812340737, "loss": 0.48543596267700195, "time": 1.8651041507720947, "epoch": 163, "memory": 36824, "step": 50806}
{"lr": 0.0014993887864047008, "data_time": 0.002322077751159668, "grad_norm": 0.1513101913034916, "loss": 0.48435480892658234, "time": 1.8914575815200805, "epoch": 163, "memory": 36824, "step": 50906}
{"lr": 0.001498875229903917, "data_time": 0.002937650680541992, "grad_norm": 0.1564815878868103, "loss": 0.4870688259601593, "time": 1.7957018613815308, "epoch": 163, "memory": 36824, "step": 51006}
{"lr": 0.0014982934441676295, "data_time": 0.276970362663269, "grad_norm": 0.14239834547042846, "loss": 0.4898157864809036, "time": 1.8641616344451903, "epoch": 164, "memory": 36824, "step": 51119}
{"lr": 0.001497777292398443, "data_time": 0.002351212501525879, "grad_norm": 0.15832609310746193, "loss": 0.4871080815792084, "time": 1.9599763870239257, "epoch": 164, "memory": 36824, "step": 51219}
{"lr": 0.0014972599236010213, "data_time": 0.003242349624633789, "grad_norm": 0.14113953858613967, "loss": 0.48610104620456696, "time": 1.8852211713790894, "epoch": 164, "memory": 36824, "step": 51319}
{"lr": 0.0014966738333603991, "data_time": 0.15617902278900148, "grad_norm": 0.1591462180018425, "loss": 0.481986403465271, "time": 1.8510818481445312, "epoch": 165, "memory": 36824, "step": 51432}
{"lr": 0.0014961538753034932, "data_time": 0.0023597240447998046, "grad_norm": 0.13994700983166694, "loss": 0.4870159387588501, "time": 1.8588581562042237, "epoch": 165, "memory": 36824, "step": 51532}
{"lr": 0.001495632703049835, "data_time": 0.0032388925552368163, "grad_norm": 0.15309621021151543, "loss": 0.4881798893213272, "time": 1.917156195640564, "epoch": 165, "memory": 36824, "step": 51632}
{"lr": 0.0014950423183195437, "data_time": 0.3306635856628418, "grad_norm": 0.16660864874720574, "loss": 0.48408391773700715, "time": 1.947831130027771, "epoch": 166, "memory": 36824, "step": 51745}
{"lr": 0.00149451856285957, "data_time": 0.0025951623916625976, "grad_norm": 0.12782456502318382, "loss": 0.484603089094162, "time": 1.8403226852416992, "epoch": 166, "memory": 36824, "step": 51845}
{"lr": 0.00149399359605507, "data_time": 0.0034066200256347655, "grad_norm": 0.1407249979674816, "loss": 0.4872187525033951, "time": 1.8237189292907714, "epoch": 166, "memory": 36824, "step": 51945}
{"lr": 0.0014933989269231573, "data_time": 0.4188942193984985, "grad_norm": 0.13044414520263672, "loss": 0.4844709813594818, "time": 2.052128314971924, "epoch": 167, "memory": 36824, "step": 52058}
{"lr": 0.0014928713830096477, "data_time": 0.0024759769439697266, "grad_norm": 0.18464347571134568, "loss": 0.4866402417421341, "time": 1.912821888923645, "epoch": 167, "memory": 36824, "step": 52158}
{"lr": 0.001492342630624543, "data_time": 0.0027313947677612303, "grad_norm": 0.14475462585687637, "loss": 0.4890262812376022, "time": 1.8492493629455566, "epoch": 167, "memory": 36824, "step": 52258}
{"lr": 0.0014917436872522612, "data_time": 0.129469895362854, "grad_norm": 0.1523721620440483, "loss": 0.4861016243696213, "time": 1.8654488325119019, "epoch": 168, "memory": 36824, "step": 52371}
{"lr": 0.0014912123638994878, "data_time": 0.002599167823791504, "grad_norm": 0.1500331401824951, "loss": 0.48879952132701876, "time": 1.853046989440918, "epoch": 168, "memory": 36824, "step": 52471}
{"lr": 0.001490679834968696, "data_time": 0.0027432680130004884, "grad_norm": 0.14353361427783967, "loss": 0.49027952253818513, "time": 1.8054392099380494, "epoch": 168, "memory": 36824, "step": 52571}
{"lr": 0.0014900766275903362, "data_time": 0.13706281185150146, "grad_norm": 0.15380750522017478, "loss": 0.48445646166801454, "time": 1.9125319719314575, "epoch": 169, "memory": 36824, "step": 52684}
{"lr": 0.0014895415338771478, "data_time": 0.002722358703613281, "grad_norm": 0.1474035955965519, "loss": 0.4909650981426239, "time": 1.8733744382858277, "epoch": 169, "memory": 36824, "step": 52784}
{"lr": 0.001489005237500119, "data_time": 0.0027782201766967775, "grad_norm": 0.17870063930749894, "loss": 0.4825774073600769, "time": 1.806381916999817, "epoch": 169, "memory": 36824, "step": 52884}
{"lr": 0.0014883977764228304, "data_time": 0.061556243896484376, "grad_norm": 0.14018207415938377, "loss": 0.48085257709026336, "time": 1.9086678743362426, "epoch": 170, "memory": 36824, "step": 52997}
{"lr": 0.0014878589214925024, "data_time": 0.0022414207458496095, "grad_norm": 0.15724752098321915, "loss": 0.4858349531888962, "time": 2.030556559562683, "epoch": 170, "memory": 36824, "step": 53097}
{"lr": 0.0014873188668330648, "data_time": 0.0033245086669921875, "grad_norm": 0.15612386614084245, "loss": 0.48816308081150056, "time": 1.9594725370407104, "epoch": 170, "memory": 36824, "step": 53197}
{"lr": 0.0014867071624366792, "data_time": 0.22293598651885987, "grad_norm": 0.13421955183148385, "loss": 0.4874359667301178, "time": 1.9081759214401246, "epoch": 171, "memory": 36824, "step": 53310}
{"lr": 0.001486164555496756, "data_time": 0.0024654150009155275, "grad_norm": 0.14145127981901168, "loss": 0.48774695098400117, "time": 1.8662558317184448, "epoch": 171, "memory": 36824, "step": 53410}
{"lr": 0.0014856207517829493, "data_time": 0.0031776666641235352, "grad_norm": 0.1476552218198776, "loss": 0.4837233334779739, "time": 1.9336641073226928, "epoch": 171, "memory": 36824, "step": 53510}
{"lr": 0.0014850048145198102, "data_time": 0.053430366516113284, "grad_norm": 0.14909310042858123, "loss": 0.48909271955490113, "time": 1.8669921159744263, "epoch": 172, "memory": 36824, "step": 53623}
{"lr": 0.0014844584648419487, "data_time": 0.0022326946258544923, "grad_norm": 0.16531476080417634, "loss": 0.48799607157707214, "time": 1.8347581386566163, "epoch": 172, "memory": 36824, "step": 53723}
{"lr": 0.001483910921365881, "data_time": 0.0028212547302246095, "grad_norm": 0.16255063861608504, "loss": 0.4827022671699524, "time": 2.041848921775818, "epoch": 172, "memory": 36824, "step": 53823}
{"lr": 0.0014832907617606558, "data_time": 0.3035320520401001, "grad_norm": 0.1528352439403534, "loss": 0.48985946774482725, "time": 1.879862666130066, "epoch": 173, "memory": 36824, "step": 53936}
{"lr": 0.0014827406786804631, "data_time": 0.002401137351989746, "grad_norm": 0.15199998542666435, "loss": 0.48772897124290465, "time": 1.851313042640686, "epoch": 173, "memory": 36824, "step": 54036}
{"lr": 0.0014821894047981415, "data_time": 0.0028124570846557615, "grad_norm": 0.1690162993967533, "loss": 0.4836514234542847, "time": 1.8292348623275756, "epoch": 173, "memory": 36824, "step": 54136}
{"lr": 0.0014815650334476466, "data_time": 0.22175328731536864, "grad_norm": 0.15805824622511863, "loss": 0.4855764538049698, "time": 1.866059994697571, "epoch": 174, "memory": 36824, "step": 54249}
{"lr": 0.0014810112263645252, "data_time": 0.0023001432418823242, "grad_norm": 0.1516730546951294, "loss": 0.48669466078281404, "time": 1.8431983947753907, "epoch": 174, "memory": 36824, "step": 54349}
{"lr": 0.001480456231495698, "data_time": 0.003189969062805176, "grad_norm": 0.15077462866902352, "loss": 0.4839086949825287, "time": 2.272278594970703, "epoch": 174, "memory": 36824, "step": 54449}
{"lr": 0.0014798276590687173, "data_time": 0.22520780563354492, "grad_norm": 0.15752134770154952, "loss": 0.49187812507152556, "time": 1.9521388053894042, "epoch": 175, "memory": 36824, "step": 54562}
{"lr": 0.001479270137445701, "data_time": 0.0024306058883666994, "grad_norm": 0.142337179929018, "loss": 0.48512808680534364, "time": 1.8590705394744873, "epoch": 175, "memory": 36824, "step": 54662}
{"lr": 0.0014787114310737023, "data_time": 0.002941799163818359, "grad_norm": 0.15851011723279954, "loss": 0.48587273359298705, "time": 1.9103439807891847, "epoch": 175, "memory": 36824, "step": 54762}
{"lr": 0.0014780786683108028, "data_time": 0.01818268299102783, "grad_norm": 0.1342037223279476, "loss": 0.484595912694931, "time": 1.9424433708190918, "epoch": 176, "memory": 36824, "step": 54875}
{"lr": 0.0014775174416743993, "data_time": 0.002385997772216797, "grad_norm": 0.14368339627981186, "loss": 0.48866083323955534, "time": 1.8561405658721923, "epoch": 176, "memory": 36824, "step": 54975}
{"lr": 0.001476955033345977, "data_time": 0.002657651901245117, "grad_norm": 0.14778063893318177, "loss": 0.4847827136516571, "time": 1.9442948341369628, "epoch": 176, "memory": 36824, "step": 55075}
{"lr": 0.0014763180910593303, "data_time": 0.19235050678253174, "grad_norm": 0.14382144287228585, "loss": 0.4862242370843887, "time": 1.8230872392654418, "epoch": 177, "memory": 36824, "step": 55188}
{"lr": 0.0014757531689993547, "data_time": 0.002316451072692871, "grad_norm": 0.15882713943719864, "loss": 0.48702003359794616, "time": 1.8682417154312134, "epoch": 177, "memory": 36824, "step": 55288}
{"lr": 0.001475187068324514, "data_time": 0.0025516033172607424, "grad_norm": 0.13799122720956802, "loss": 0.4864581197500229, "time": 1.732729721069336, "epoch": 177, "memory": 36824, "step": 55388}
{"lr": 0.0014745459573977059, "data_time": 0.0024863481521606445, "grad_norm": 0.16816541329026222, "loss": 0.4847221314907074, "time": 1.8465152978897095, "epoch": 178, "memory": 36824, "step": 55501}
{"lr": 0.0014739773495671155, "data_time": 0.0026376962661743162, "grad_norm": 0.13979245200753213, "loss": 0.48942471146583555, "time": 1.8814806461334228, "epoch": 178, "memory": 36824, "step": 55601}
{"lr": 0.0014734075662189582, "data_time": 0.0027558565139770507, "grad_norm": 0.13751528337597846, "loss": 0.4827274203300476, "time": 1.8844356536865234, "epoch": 178, "memory": 36824, "step": 55701}
{"lr": 0.0014727622976068045, "data_time": 0.002366471290588379, "grad_norm": 0.16393957585096358, "loss": 0.48404603600502016, "time": 1.8770650386810304, "epoch": 179, "memory": 36824, "step": 55814}
{"lr": 0.0014721900137215408, "data_time": 0.002447843551635742, "grad_norm": 0.14539702460169793, "loss": 0.4881973505020142, "time": 1.8612702369689942, "epoch": 179, "memory": 36824, "step": 55914}
{"lr": 0.0014716165574360889, "data_time": 0.002723503112792969, "grad_norm": 0.15376784056425094, "loss": 0.486798095703125, "time": 1.8886102199554444, "epoch": 179, "memory": 36824, "step": 56014}
{"lr": 0.001470967142164452, "data_time": 0.09411799907684326, "grad_norm": 0.15792885720729827, "loss": 0.48474317491054536, "time": 1.9020081281661987, "epoch": 180, "memory": 36824, "step": 56127}
{"lr": 0.0014703911920032656, "data_time": 0.0024544954299926757, "grad_norm": 0.15226852148771286, "loss": 0.48379501700401306, "time": 2.2387398719787597, "epoch": 180, "memory": 36824, "step": 56227}
{"lr": 0.0014698140725793087, "data_time": 0.0027638912200927735, "grad_norm": 0.17239070013165475, "loss": 0.4851881891489029, "time": 1.8401047229766845, "epoch": 180, "memory": 36824, "step": 56327}
{"lr": 0.0014691605217449029, "data_time": 0.13305480480194093, "grad_norm": 0.12714398577809333, "loss": 0.48649562895298004, "time": 1.877639365196228, "epoch": 181, "memory": 36824, "step": 56440}
{"lr": 0.0014685809151491938, "data_time": 0.0024099111557006835, "grad_norm": 0.162714172154665, "loss": 0.48991506099700927, "time": 1.8585659503936767, "epoch": 181, "memory": 36824, "step": 56540}
{"lr": 0.0014680001424481075, "data_time": 0.002737116813659668, "grad_norm": 0.1398825727403164, "loss": 0.4902449697256088, "time": 1.7928276300430297, "epoch": 181, "memory": 36824, "step": 56640}
{"lr": 0.0014673424672183117, "data_time": 0.40845017433166503, "grad_norm": 0.138978760689497, "loss": 0.484792959690094, "time": 1.868248701095581, "epoch": 182, "memory": 36824, "step": 56753}
{"lr": 0.0014667592140919566, "data_time": 0.0025154829025268556, "grad_norm": 0.16285373792052268, "loss": 0.48669285476207735, "time": 1.8131901502609253, "epoch": 182, "memory": 36824, "step": 56853}
{"lr": 0.0014661747980375442, "data_time": 0.0029250383377075195, "grad_norm": 0.15599460899829865, "loss": 0.4853589802980423, "time": 1.8513255357742309, "epoch": 182, "memory": 36824, "step": 56953}
{"lr": 0.0014655130096502148, "data_time": 0.16776580810546876, "grad_norm": 0.13655056804418564, "loss": 0.4817366093397141, "time": 1.8179000616073608, "epoch": 183, "memory": 36824, "step": 57066}
{"lr": 0.0014649261199594012, "data_time": 0.0023342132568359374, "grad_norm": 0.14231911599636077, "loss": 0.49059575200080874, "time": 1.9300914764404298, "epoch": 183, "memory": 36824, "step": 57166}
{"lr": 0.0014643380705377214, "data_time": 0.0027247190475463865, "grad_norm": 0.15390681996941566, "loss": 0.48462929129600524, "time": 1.8432610273361205, "epoch": 183, "memory": 36824, "step": 57266}
{"lr": 0.0014636721803009975, "data_time": 0.3749950647354126, "grad_norm": 0.15973949506878854, "loss": 0.4891898274421692, "time": 1.8803874254226685, "epoch": 184, "memory": 36824, "step": 57379}
{"lr": 0.0014630816640740505, "data_time": 0.0023424386978149413, "grad_norm": 0.14169808030128478, "loss": 0.48466921746730807, "time": 1.9075021266937255, "epoch": 184, "memory": 36824, "step": 57479}
{"lr": 0.0014624899913332449, "data_time": 0.0034915447235107423, "grad_norm": 0.13749078884720803, "loss": 0.4846741884946823, "time": 1.9107624530792235, "epoch": 184, "memory": 36824, "step": 57579}
{"lr": 0.0014618200106253521, "data_time": 0.0022725820541381835, "grad_norm": 0.15368260443210602, "loss": 0.4876262456178665, "time": 1.832480525970459, "epoch": 185, "memory": 36824, "step": 57692}
{"lr": 0.0014612258779525661, "data_time": 0.002398514747619629, "grad_norm": 0.15464744493365287, "loss": 0.48827539682388305, "time": 1.9013987064361573, "epoch": 185, "memory": 36824, "step": 57792}
{"lr": 0.001460630592002691, "data_time": 0.0027945280075073243, "grad_norm": 0.150618214905262, "loss": 0.48412611782550813, "time": 1.7616206407546997, "epoch": 185, "memory": 36824, "step": 57892}
{"lr": 0.0014599565322717504, "data_time": 0.3593390703201294, "grad_norm": 0.16168414801359177, "loss": 0.49061791598796844, "time": 2.0801613569259643, "epoch": 186, "memory": 36824, "step": 58005}
{"lr": 0.0014593587933052123, "data_time": 0.0024298667907714845, "grad_norm": 0.14330110102891921, "loss": 0.4885875850915909, "time": 1.866189670562744, "epoch": 186, "memory": 36824, "step": 58105}
{"lr": 0.0014587599043180616, "data_time": 0.0032709360122680662, "grad_norm": 0.1650798961520195, "loss": 0.4899137049913406, "time": 1.8555349826812744, "epoch": 186, "memory": 36824, "step": 58205}
{"lr": 0.0014580817770818936, "data_time": 0.002682924270629883, "grad_norm": 0.15126209259033202, "loss": 0.4864267736673355, "time": 1.908173179626465, "epoch": 187, "memory": 36824, "step": 58318}
{"lr": 0.0014574804420353134, "data_time": 0.0023089170455932615, "grad_norm": 0.16198250651359558, "loss": 0.4861677408218384, "time": 1.8311825513839721, "epoch": 187, "memory": 36824, "step": 58418}
{"lr": 0.0014568779602442455, "data_time": 0.0033509254455566405, "grad_norm": 0.14880782067775727, "loss": 0.48817767202854156, "time": 1.8805256128311156, "epoch": 187, "memory": 36824, "step": 58518}
{"lr": 0.0014561957770901758, "data_time": 0.29804823398590086, "grad_norm": 0.1267651081085205, "loss": 0.4862898588180542, "time": 1.9123597383499145, "epoch": 188, "memory": 36824, "step": 58631}
{"lr": 0.0014555908562387105, "data_time": 0.002490806579589844, "grad_norm": 0.1458483785390854, "loss": 0.4873624384403229, "time": 1.8977587223052979, "epoch": 188, "memory": 36824, "step": 58731}
{"lr": 0.0014549847919384786, "data_time": 0.0030151128768920897, "grad_norm": 0.16501371711492538, "loss": 0.4872118979692459, "time": 1.854216504096985, "epoch": 188, "memory": 36824, "step": 58831}
{"lr": 0.001454298564523135, "data_time": 0.23140809535980225, "grad_norm": 0.1432741329073906, "loss": 0.4835095077753067, "time": 2.0680991649627685, "epoch": 189, "memory": 36824, "step": 58944}
{"lr": 0.0014536900682032131, "data_time": 0.0022789716720581056, "grad_norm": 0.12814096584916115, "loss": 0.48875128030776976, "time": 2.1163424491882323, "epoch": 189, "memory": 36824, "step": 59044}
{"lr": 0.0014530804317497838, "data_time": 0.003326106071472168, "grad_norm": 0.15903731435537338, "loss": 0.4864784687757492, "time": 1.7934677362442017, "epoch": 189, "memory": 36824, "step": 59144}
{"lr": 0.0014523901717988965, "data_time": 0.09206063747406006, "grad_norm": 0.17093401551246643, "loss": 0.48638715744018557, "time": 1.885124135017395, "epoch": 190, "memory": 36824, "step": 59257}
{"lr": 0.0014517781104080416, "data_time": 0.0025682687759399415, "grad_norm": 0.17037067264318467, "loss": 0.48333775997161865, "time": 1.8717389822006225, "epoch": 190, "memory": 36824, "step": 59357}
{"lr": 0.00145116491221842, "data_time": 0.0029785633087158203, "grad_norm": 0.14788855314254762, "loss": 0.48318021297454833, "time": 1.8264184474945069, "epoch": 190, "memory": 36824, "step": 59457}
{"lr": 0.0014504706315266287, "data_time": 0.39921548366546633, "grad_norm": 0.13809559121727943, "loss": 0.48473828136920927, "time": 2.2740922451019285, "epoch": 191, "memory": 36824, "step": 59570}
{"lr": 0.0014498550155232796, "data_time": 0.0024083614349365234, "grad_norm": 0.17309195548295975, "loss": 0.4877866327762604, "time": 1.8718624353408813, "epoch": 191, "memory": 36824, "step": 59670}
{"lr": 0.0014492382660753304, "data_time": 0.0032575368881225587, "grad_norm": 0.14026251062750816, "loss": 0.4866267412900925, "time": 2.2333248615264893, "epoch": 191, "memory": 36824, "step": 59770}
{"lr": 0.001448539976505978, "data_time": 0.02023801803588867, "grad_norm": 0.15268535763025284, "loss": 0.48913065195083616, "time": 1.8710049152374268, "epoch": 192, "memory": 36824, "step": 59883}
{"lr": 0.001447920816409314, "data_time": 0.0027248144149780275, "grad_norm": 0.1438474588096142, "loss": 0.48488832712173463, "time": 1.9298730134963988, "epoch": 192, "memory": 36824, "step": 59983}
{"lr": 0.0014473005262415823, "data_time": 0.003396463394165039, "grad_norm": 0.14431367963552474, "loss": 0.4897635579109192, "time": 1.8806206703186035, "epoch": 192, "memory": 36824, "step": 60083}
{"lr": 0.001446598239726509, "data_time": 0.36615376472473143, "grad_norm": 0.14499332904815673, "loss": 0.48312994837760925, "time": 1.849896788597107, "epoch": 193, "memory": 36824, "step": 60196}
{"lr": 0.001445975546116268, "data_time": 0.002516484260559082, "grad_norm": 0.14975243657827378, "loss": 0.4903783440589905, "time": 1.868075394630432, "epoch": 193, "memory": 36824, "step": 60296}
{"lr": 0.0014453517258278042, "data_time": 0.00339663028717041, "grad_norm": 0.14501312971115113, "loss": 0.4869555324316025, "time": 1.8741598844528198, "epoch": 193, "memory": 36824, "step": 60396}
{"lr": 0.0014446454543671481, "data_time": 0.253902006149292, "grad_norm": 0.16316679418087005, "loss": 0.4809677630662918, "time": 1.8522069692611693, "epoch": 194, "memory": 36824, "step": 60509}
{"lr": 0.0014440192378834473, "data_time": 0.0024646759033203126, "grad_norm": 0.15979356393218042, "loss": 0.48289046585559847, "time": 1.8411011934280395, "epoch": 194, "memory": 36824, "step": 60609}
{"lr": 0.0014433918981336172, "data_time": 0.0029190540313720702, "grad_norm": 0.1605844810605049, "loss": 0.48566803634166716, "time": 1.8360018014907837, "epoch": 194, "memory": 36824, "step": 60709}
{"lr": 0.0014426816537956109, "data_time": 0.20425751209259033, "grad_norm": 0.1489625595510006, "loss": 0.4821602314710617, "time": 1.8641482591629028, "epoch": 195, "memory": 36824, "step": 60822}
{"lr": 0.0014420519251387616, "data_time": 0.0025043487548828125, "grad_norm": 0.16701146513223647, "loss": 0.48373163044452666, "time": 1.8996408700942993, "epoch": 195, "memory": 36824, "step": 60922}
{"lr": 0.001441421076647072, "data_time": 0.002774667739868164, "grad_norm": 0.14239876717329025, "loss": 0.48274402022361756, "time": 1.8960981130599976, "epoch": 195, "memory": 36824, "step": 61022}
{"lr": 0.0014407068715678317, "data_time": 0.07721543312072754, "grad_norm": 0.14046293795108794, "loss": 0.48407929539680483, "time": 1.8581570863723755, "epoch": 196, "memory": 36824, "step": 61135}
{"lr": 0.0014400736414981572, "data_time": 0.002506875991821289, "grad_norm": 0.16784363985061646, "loss": 0.48207263052463534, "time": 1.865582323074341, "epoch": 196, "memory": 36824, "step": 61235}
{"lr": 0.0014394392950440654, "data_time": 0.0029201745986938477, "grad_norm": 0.13449205681681634, "loss": 0.4829797625541687, "time": 1.8753120183944703, "epoch": 196, "memory": 36824, "step": 61335}
{"lr": 0.0014387211414273844, "data_time": 0.19948697090148926, "grad_norm": 0.14860331416130065, "loss": 0.4869536519050598, "time": 1.9157105445861817, "epoch": 197, "memory": 36824, "step": 61448}
{"lr": 0.001438084420765041, "data_time": 0.0024996280670166017, "grad_norm": 0.16125691682100296, "loss": 0.4853223621845245, "time": 1.9578809499740601, "epoch": 197, "memory": 36824, "step": 61548}
{"lr": 0.0014374465871877793, "data_time": 0.0033057212829589845, "grad_norm": 0.14965041726827621, "loss": 0.48567060232162473, "time": 1.8747182369232178, "epoch": 197, "memory": 36824, "step": 61648}
{"lr": 0.0014367244973049224, "data_time": 0.16634886264801024, "grad_norm": 0.15577004551887513, "loss": 0.4841023743152618, "time": 1.8438978672027588, "epoch": 198, "memory": 36824, "step": 61761}
{"lr": 0.0014360842969297085, "data_time": 0.002382636070251465, "grad_norm": 0.1505852833390236, "loss": 0.48700003921985624, "time": 1.862695002555847, "epoch": 198, "memory": 36824, "step": 61861}
{"lr": 0.001435442987128093, "data_time": 0.0026389360427856445, "grad_norm": 0.1470955565571785, "loss": 0.4851991355419159, "time": 1.9214387655258178, "epoch": 198, "memory": 36824, "step": 61961}
{"lr": 0.0014347169733175812, "data_time": 0.1649153709411621, "grad_norm": 0.15159436985850333, "loss": 0.4846257209777832, "time": 1.8569815158843994, "epoch": 199, "memory": 36824, "step": 62074}
{"lr": 0.0014340733041687567, "data_time": 0.0025992631912231446, "grad_norm": 0.16031184941530227, "loss": 0.4867650121450424, "time": 1.8770628929138184, "epoch": 199, "memory": 36824, "step": 62174}
{"lr": 0.0014334285291010033, "data_time": 0.003423762321472168, "grad_norm": 0.13855483904480934, "loss": 0.48541157841682436, "time": 1.8240908145904542, "epoch": 199, "memory": 36824, "step": 62274}
{"lr": 0.0014326986037684136, "data_time": 0.22539880275726318, "grad_norm": 0.1566064976155758, "loss": 0.4879933506250381, "time": 1.87759428024292, "epoch": 200, "memory": 36824, "step": 62387}
{"lr": 0.0014320514768445076, "data_time": 0.0024957895278930665, "grad_norm": 0.13986362591385842, "loss": 0.4846964985132217, "time": 1.858044481277466, "epoch": 200, "memory": 36824, "step": 62487}
{"lr": 0.0014314032475280418, "data_time": 0.002944684028625488, "grad_norm": 0.14635549187660218, "loss": 0.4832313507795334, "time": 1.8744933128356933, "epoch": 200, "memory": 36824, "step": 62587}
{"lr": 0.00143066942314578, "data_time": 0.35889592170715334, "grad_norm": 0.1575212448835373, "loss": 0.4848070055246353, "time": 1.8696979761123658, "epoch": 201, "memory": 36824, "step": 62700}
{"lr": 0.0014300188495044055, "data_time": 0.0024942398071289063, "grad_norm": 0.15593077391386032, "loss": 0.4839187353849411, "time": 1.8818164110183715, "epoch": 201, "memory": 36824, "step": 62800}
{"lr": 0.0014293671770156788, "data_time": 0.0033396482467651367, "grad_norm": 0.17827295437455176, "loss": 0.4811025232076645, "time": 1.8165067911148072, "epoch": 201, "memory": 36824, "step": 62900}
{"lr": 0.001428629466122782, "data_time": 0.42269039154052734, "grad_norm": 0.15911196917295456, "loss": 0.4816774368286133, "time": 1.939547848701477, "epoch": 202, "memory": 36824, "step": 63013}
{"lr": 0.0014279754568804466, "data_time": 0.0023840904235839845, "grad_norm": 0.15428945496678353, "loss": 0.4857001543045044, "time": 1.899057960510254, "epoch": 202, "memory": 36824, "step": 63113}
{"lr": 0.0014273203523547408, "data_time": 0.003395676612854004, "grad_norm": 0.1387595422565937, "loss": 0.48469198644161227, "time": 1.8192869424819946, "epoch": 202, "memory": 36824, "step": 63213}
{"lr": 0.0014265787675566514, "data_time": 0.39151949882507325, "grad_norm": 0.15755331069231032, "loss": 0.4862195521593094, "time": 2.14905960559845, "epoch": 203, "memory": 36824, "step": 63326}
{"lr": 0.0014259213338885697, "data_time": 0.002566409111022949, "grad_norm": 0.14470362886786461, "loss": 0.4779351383447647, "time": 1.8657771348953247, "epoch": 203, "memory": 36824, "step": 63426}
{"lr": 0.0014252628085198189, "data_time": 0.0033065319061279298, "grad_norm": 0.14007729664444923, "loss": 0.4826598525047302, "time": 1.8648659229278564, "epoch": 203, "memory": 36824, "step": 63526}
{"lr": 0.0014245173624881766, "data_time": 0.21852200031280516, "grad_norm": 0.1627061150968075, "loss": 0.47628743648529054, "time": 1.8871947288513184, "epoch": 204, "memory": 36824, "step": 63639}
{"lr": 0.0014238565156280767, "data_time": 0.002563953399658203, "grad_norm": 0.14632503539323807, "loss": 0.48813999593257906, "time": 1.9190196990966797, "epoch": 204, "memory": 36824, "step": 63739}
{"lr": 0.0014231945806686603, "data_time": 0.0036120176315307616, "grad_norm": 0.1487612709403038, "loss": 0.4888307213783264, "time": 1.853407049179077, "epoch": 204, "memory": 36824, "step": 63839}
{"lr": 0.0014224452861410798, "data_time": 0.32183635234832764, "grad_norm": 0.14260665848851203, "loss": 0.4839299887418747, "time": 2.0930901050567625, "epoch": 205, "memory": 36824, "step": 63952}
{"lr": 0.0014217810373810106, "data_time": 0.0024753570556640624, "grad_norm": 0.1581368066370487, "loss": 0.4840857684612274, "time": 1.8536711931228638, "epoch": 205, "memory": 36824, "step": 64052}
{"lr": 0.0014211157041415707, "data_time": 0.003840947151184082, "grad_norm": 0.1445240430533886, "loss": 0.4793038696050644, "time": 1.8544978857040406, "epoch": 205, "memory": 36824, "step": 64152}
{"lr": 0.0014203625739214302, "data_time": 0.3051112413406372, "grad_norm": 0.14256088808178902, "loss": 0.48438647091388704, "time": 1.8844319343566895, "epoch": 206, "memory": 36824, "step": 64265}
{"lr": 0.0014196949346115706, "data_time": 0.0025198936462402345, "grad_norm": 0.1511609099805355, "loss": 0.4862354457378387, "time": 1.834150242805481, "epoch": 206, "memory": 36824, "step": 64365}
{"lr": 0.001419026214460816, "data_time": 0.0035213470458984376, "grad_norm": 0.14111485406756402, "loss": 0.4872849702835083, "time": 1.8691346645355225, "epoch": 206, "memory": 36824, "step": 64465}
{"lr": 0.0014182692614170308, "data_time": 0.2194589614868164, "grad_norm": 0.15536224097013474, "loss": 0.48268712162971494, "time": 1.890639638900757, "epoch": 207, "memory": 36824, "step": 64578}
{"lr": 0.0014175982429654962, "data_time": 0.002442002296447754, "grad_norm": 0.16373848617076875, "loss": 0.47921618819236755, "time": 1.868643856048584, "epoch": 207, "memory": 36824, "step": 64678}
{"lr": 0.001416926147330009, "data_time": 0.0032943248748779296, "grad_norm": 0.13764430657029153, "loss": 0.4819365471601486, "time": 1.8552719831466675, "epoch": 207, "memory": 36824, "step": 64778}
{"lr": 0.0014161653843968159, "data_time": 0.0847428560256958, "grad_norm": 0.13644906431436538, "loss": 0.4795131325721741, "time": 1.8759557247161864, "epoch": 208, "memory": 36824, "step": 64891}
{"lr": 0.001415490998269457, "data_time": 0.0025924921035766603, "grad_norm": 0.1401400998234749, "loss": 0.48496178686618807, "time": 1.9197844743728638, "epoch": 208, "memory": 36824, "step": 64991}
{"lr": 0.0014148155386334972, "data_time": 0.003953003883361816, "grad_norm": 0.14653465747833253, "loss": 0.48353088796138766, "time": 1.8420462131500244, "epoch": 208, "memory": 36824, "step": 65091}
{"lr": 0.0014140509788102395, "data_time": 0.38791608810424805, "grad_norm": 0.15984534472227097, "loss": 0.4850501328706741, "time": 1.835355854034424, "epoch": 209, "memory": 36824, "step": 65204}
{"lr": 0.0014133732365304572, "data_time": 0.02080724239349365, "grad_norm": Infinity, "loss": 0.48936277329921724, "time": 1.8645618677139282, "epoch": 209, "memory": 36824, "step": 65304}
{"lr": 0.0014126944244357668, "data_time": 0.003708076477050781, "grad_norm": 0.13069095984101295, "loss": 0.48674664795398714, "time": 1.8775474309921265, "epoch": 209, "memory": 36824, "step": 65404}
{"lr": 0.0014119260807866608, "data_time": 0.3054033279418945, "grad_norm": 0.14162577986717223, "loss": 0.48461097180843354, "time": 1.8342324018478393, "epoch": 210, "memory": 36824, "step": 65517}
{"lr": 0.0014112449939351964, "data_time": 0.0025049686431884766, "grad_norm": 0.13492735102772713, "loss": 0.4863227427005768, "time": 1.8349132537841797, "epoch": 210, "memory": 36824, "step": 65617}
{"lr": 0.0014105628409808004, "data_time": 0.0029002666473388673, "grad_norm": 0.1428115874528885, "loss": 0.48163354098796846, "time": 1.834232211112976, "epoch": 210, "memory": 36824, "step": 65717}
{"lr": 0.0014097907266347194, "data_time": 0.00253143310546875, "grad_norm": 0.15854293256998062, "loss": 0.4823582828044891, "time": 1.8327012300491332, "epoch": 211, "memory": 36824, "step": 65830}
{"lr": 0.001409106306849472, "data_time": 0.0025033473968505858, "grad_norm": 0.14700784012675286, "loss": 0.48085759580135345, "time": 1.8157867193222046, "epoch": 211, "memory": 36824, "step": 65930}
{"lr": 0.001408420824691483, "data_time": 0.0028459787368774413, "grad_norm": 0.15229340866208077, "loss": 0.4819756090641022, "time": 1.8220194101333618, "epoch": 211, "memory": 36824, "step": 66030}
{"lr": 0.0014076449528417291, "data_time": 0.08613040447235107, "grad_norm": 0.15146972686052323, "loss": 0.48933247923851014, "time": 1.8384586334228517, "epoch": 212, "memory": 36824, "step": 66143}
{"lr": 0.0014069572118175445, "data_time": 0.0023810148239135744, "grad_norm": 0.1590214803814888, "loss": 0.4868337631225586, "time": 1.801504135131836, "epoch": 212, "memory": 36824, "step": 66243}
{"lr": 0.0014062684121689613, "data_time": 0.0033679962158203124, "grad_norm": 0.14724015966057777, "loss": 0.48294111490249636, "time": 1.8525300979614259, "epoch": 212, "memory": 36824, "step": 66343}
{"lr": 0.001405488796073046, "data_time": 0.04494397640228272, "grad_norm": 0.1364560067653656, "loss": 0.4795979380607605, "time": 1.8465234518051148, "epoch": 213, "memory": 36824, "step": 66456}
{"lr": 0.001404797745561518, "data_time": 0.0025672435760498045, "grad_norm": 0.18448254242539405, "loss": 0.48500230312347414, "time": 1.8083454370498657, "epoch": 213, "memory": 36824, "step": 66556}
{"lr": 0.0014041056401920269, "data_time": 0.0033205270767211912, "grad_norm": 0.1653960794210434, "loss": 0.4829452455043793, "time": 1.8514568090438843, "epoch": 213, "memory": 36824, "step": 66656}
{"lr": 0.0014033222931714377, "data_time": 0.2979900598526001, "grad_norm": 0.14148368090391159, "loss": 0.4832250952720642, "time": 1.8818673372268677, "epoch": 214, "memory": 36824, "step": 66769}
{"lr": 0.0014026279449807146, "data_time": 0.0023061275482177735, "grad_norm": 0.1518888033926487, "loss": 0.4916439950466156, "time": 1.8566459894180298, "epoch": 214, "memory": 36824, "step": 66869}
{"lr": 0.0014019325457164846, "data_time": 0.0037925004959106444, "grad_norm": 0.14559702724218368, "loss": 0.4797347098588943, "time": 1.7696943044662476, "epoch": 214, "memory": 36824, "step": 66969}
{"lr": 0.0014011454811564603, "data_time": 0.20438988208770753, "grad_norm": 0.15006668046116828, "loss": 0.4877879947423935, "time": 1.8871328830718994, "epoch": 215, "memory": 36824, "step": 67082}
{"lr": 0.0014004478471510368, "data_time": 0.0025572776794433594, "grad_norm": 0.16299215257167815, "loss": 0.4869422048330307, "time": 1.874497365951538, "epoch": 215, "memory": 36824, "step": 67182}
{"lr": 0.0013997491658745253, "data_time": 0.0034240961074829103, "grad_norm": 0.15064238756895065, "loss": 0.486334490776062, "time": 1.9791069507598877, "epoch": 215, "memory": 36824, "step": 67282}
{"lr": 0.0013989583972238294, "data_time": 0.23099005222320557, "grad_norm": 0.16193856596946715, "loss": 0.4847529172897339, "time": 1.9024526834487916, "epoch": 216, "memory": 36824, "step": 67395}
{"lr": 0.001398257489324346, "data_time": 0.0024376392364501955, "grad_norm": 0.1754717171192169, "loss": 0.4828225553035736, "time": 1.8180989503860474, "epoch": 216, "memory": 36824, "step": 67495}
{"lr": 0.0013975555379740882, "data_time": 0.002742362022399902, "grad_norm": 0.1996553860604763, "loss": 0.4827772915363312, "time": 1.8603077411651612, "epoch": 216, "memory": 36824, "step": 67595}
{"lr": 0.001396761078744773, "data_time": 0.0023429393768310547, "grad_norm": 0.14914307817816735, "loss": 0.4838524341583252, "time": 1.8361252069473266, "epoch": 217, "memory": 36824, "step": 67708}
{"lr": 0.0013960569089278116, "data_time": 0.002580070495605469, "grad_norm": 0.20731363520026208, "loss": 0.482020965218544, "time": 1.8380635261535645, "epoch": 217, "memory": 36824, "step": 67808}
{"lr": 0.001395351699498217, "data_time": 0.0033847332000732423, "grad_norm": 0.16140511259436607, "loss": 0.4874441742897034, "time": 1.824706530570984, "epoch": 217, "memory": 36824, "step": 67908}
{"lr": 0.0013945535632654002, "data_time": 0.0023729562759399413, "grad_norm": 0.1572107896208763, "loss": 0.48739710450172424, "time": 1.876958155632019, "epoch": 218, "memory": 36824, "step": 68021}
{"lr": 0.0013938461435632815, "data_time": 0.002726912498474121, "grad_norm": 0.16140828132629395, "loss": 0.488601878285408, "time": 1.8865764617919922, "epoch": 218, "memory": 36824, "step": 68121}
{"lr": 0.001393137688104433, "data_time": 0.0032928466796875, "grad_norm": 0.14194570630788803, "loss": 0.4842346966266632, "time": 1.8478269815444945, "epoch": 218, "memory": 36824, "step": 68221}
{"lr": 0.0013923358885060597, "data_time": 0.03546600341796875, "grad_norm": 0.1546568140387535, "loss": 0.4854838877916336, "time": 1.8694029808044434, "epoch": 219, "memory": 36824, "step": 68334}
{"lr": 0.0013916252310066314, "data_time": 0.0026097536087036134, "grad_norm": 0.16301179826259612, "loss": 0.482526969909668, "time": 1.919849419593811, "epoch": 219, "memory": 36824, "step": 68434}
{"lr": 0.0013909135416240814, "data_time": 0.0032873630523681642, "grad_norm": 0.17744024842977524, "loss": 0.48155671954154966, "time": 1.8338196754455567, "epoch": 219, "memory": 36824, "step": 68534}
{"lr": 0.0013901080923606963, "data_time": 0.19890310764312744, "grad_norm": 0.17067791670560836, "loss": 0.48100591599941256, "time": 1.8861464023590089, "epoch": 220, "memory": 36824, "step": 68647}
{"lr": 0.0013893942092071383, "data_time": 0.027437567710876465, "grad_norm": 0.16372552514076233, "loss": 0.48684785068035125, "time": 2.223517155647278, "epoch": 220, "memory": 36824, "step": 68747}
{"lr": 0.0013886792980616922, "data_time": 0.0034654855728149412, "grad_norm": 0.15802869200706482, "loss": 0.4853780925273895, "time": 1.860045027732849, "epoch": 220, "memory": 36824, "step": 68847}
{"lr": 0.0013878702128961979, "data_time": 0.10243692398071289, "grad_norm": 0.1543609917163849, "loss": 0.48374001383781434, "time": 1.836736273765564, "epoch": 221, "memory": 36824, "step": 68960}
{"lr": 0.0013871531162868006, "data_time": 0.0028914213180541992, "grad_norm": 0.1608805388212204, "loss": 0.482477656006813, "time": 1.890627098083496, "epoch": 221, "memory": 36824, "step": 69060}
{"lr": 0.0013864349955943181, "data_time": 0.0028717517852783203, "grad_norm": 0.14772227108478547, "loss": 0.4858135163784027, "time": 1.905895471572876, "epoch": 221, "memory": 36824, "step": 69160}
{"lr": 0.00138562228835175, "data_time": 0.06454312801361084, "grad_norm": 0.15071311891078948, "loss": 0.48702488243579867, "time": 1.943054485321045, "epoch": 222, "memory": 36824, "step": 69273}
{"lr": 0.001384901990539717, "data_time": 0.002437734603881836, "grad_norm": 0.15180661380290986, "loss": 0.4842871963977814, "time": 1.7697608947753907, "epoch": 222, "memory": 36824, "step": 69373}
{"lr": 0.0013841806725708992, "data_time": 0.0049855709075927734, "grad_norm": 0.15663600265979766, "loss": 0.4819908082485199, "time": 1.7726701974868775, "epoch": 222, "memory": 36824, "step": 69473}
{"lr": 0.0013833643571381786, "data_time": 0.3106598615646362, "grad_norm": 0.1519910752773285, "loss": 0.47875781953334806, "time": 1.8734225511550904, "epoch": 223, "memory": 36824, "step": 69586}
{"lr": 0.0013826408704314126, "data_time": 0.002292442321777344, "grad_norm": 0.14079128280282022, "loss": 0.48114549219608305, "time": 1.8985119342803956, "epoch": 223, "memory": 36824, "step": 69686}
{"lr": 0.0013819163675115934, "data_time": 0.003256702423095703, "grad_norm": 0.18004499822854997, "loss": 0.4832508832216263, "time": 1.7871602296829223, "epoch": 223, "memory": 36824, "step": 69786}
{"lr": 0.0013810964578373024, "data_time": 0.016169357299804687, "grad_norm": 0.16319513618946074, "loss": 0.4882427781820297, "time": 1.837359881401062, "epoch": 224, "memory": 36824, "step": 69899}
{"lr": 0.0013803697945981945, "data_time": 0.0023032665252685548, "grad_norm": 0.14082239270210267, "loss": 0.48430615067481997, "time": 1.8446102619171143, "epoch": 224, "memory": 36824, "step": 69999}
{"lr": 0.0013796421191071312, "data_time": 0.0038422346115112305, "grad_norm": 0.14927779883146286, "loss": 0.48357015252113345, "time": 1.778607392311096, "epoch": 224, "memory": 36824, "step": 70099}
{"lr": 0.0013788186292012625, "data_time": 0.14322855472564697, "grad_norm": 0.1592566415667534, "loss": 0.48611961007118226, "time": 1.8651113986968995, "epoch": 225, "memory": 36824, "step": 70212}
{"lr": 0.0013780888018464813, "data_time": 0.0030631303787231447, "grad_norm": 0.1572972819209099, "loss": 0.4867266982793808, "time": 1.949214243888855, "epoch": 225, "memory": 36824, "step": 70312}
{"lr": 0.0013773579662181398, "data_time": 0.0029363155364990233, "grad_norm": 0.17990368306636811, "loss": 0.48086797893047334, "time": 1.818639373779297, "epoch": 225, "memory": 36824, "step": 70412}
{"lr": 0.0013765309101518635, "data_time": 0.31724088191986083, "grad_norm": 0.15896094292402269, "loss": 0.48687304854393004, "time": 1.8604278326034547, "epoch": 226, "memory": 36824, "step": 70525}
{"lr": 0.001375797931152147, "data_time": 0.0024926185607910155, "grad_norm": 0.14636934846639632, "loss": 0.4816989481449127, "time": 1.910627579689026, "epoch": 226, "memory": 36824, "step": 70625}
{"lr": 0.0013750639478744909, "data_time": 0.002868461608886719, "grad_norm": 0.1416740708053112, "loss": 0.4849226623773575, "time": 1.804677391052246, "epoch": 226, "memory": 36824, "step": 70725}
{"lr": 0.0013742333397799148, "data_time": 0.16100819110870362, "grad_norm": 0.15681503415107728, "loss": 0.48421470522880555, "time": 1.840892767906189, "epoch": 227, "memory": 36824, "step": 70838}
{"lr": 0.0013734972216598487, "data_time": 0.0024309396743774415, "grad_norm": 0.162234927713871, "loss": 0.4848584204912186, "time": 1.8577174425125123, "epoch": 227, "memory": 36824, "step": 70938}
{"lr": 0.0013727601032746276, "data_time": 0.003376412391662598, "grad_norm": 0.14811394810676576, "loss": 0.48416371643543243, "time": 1.828880286216736, "epoch": 227, "memory": 36824, "step": 71038}
{"lr": 0.001371925957344549, "data_time": 0.29180397987365725, "grad_norm": 0.15057269036769866, "loss": 0.48485279381275176, "time": 2.071231961250305, "epoch": 228, "memory": 36824, "step": 71151}
{"lr": 0.001371186712682364, "data_time": 0.002577853202819824, "grad_norm": 0.14977768808603287, "loss": 0.485412460565567, "time": 1.934619164466858, "epoch": 228, "memory": 36824, "step": 71251}
{"lr": 0.0013704464717848967, "data_time": 0.00395195484161377, "grad_norm": 0.1550478719174862, "loss": 0.48281430304050443, "time": 1.928794026374817, "epoch": 228, "memory": 36824, "step": 71351}
{"lr": 0.0013696088022725704, "data_time": 0.18148550987243653, "grad_norm": 0.1697886660695076, "loss": 0.4888741672039032, "time": 1.8426074266433716, "epoch": 229, "memory": 36824, "step": 71464}
{"lr": 0.0013688664436999184, "data_time": 0.0025716304779052736, "grad_norm": 0.164222452044487, "loss": 0.4855283975601196, "time": 1.8201170921325684, "epoch": 229, "memory": 36824, "step": 71564}
{"lr": 0.00136812309293888, "data_time": 0.0034531354904174805, "grad_norm": 0.1689108729362488, "loss": 0.48310700356960296, "time": 1.8576790809631347, "epoch": 229, "memory": 36824, "step": 71664}
{"lr": 0.0013672819141577683, "data_time": 0.34740324020385743, "grad_norm": 0.1740519866347313, "loss": 0.486845937371254, "time": 2.048407769203186, "epoch": 230, "memory": 36824, "step": 71777}
{"lr": 0.0013665364543595068, "data_time": 0.0025573015213012696, "grad_norm": 0.14872922003269196, "loss": 0.48098466694355013, "time": 1.8529795408248901, "epoch": 230, "memory": 36824, "step": 71877}
{"lr": 0.0013657900064367116, "data_time": 0.0034998178482055662, "grad_norm": 0.14693633764982222, "loss": 0.4868544518947601, "time": 1.8240595817565919, "epoch": 230, "memory": 36824, "step": 71977}
{"lr": 0.0013649453327602367, "data_time": 0.002665543556213379, "grad_norm": 0.14782846346497536, "loss": 0.4824395030736923, "time": 1.8290847539901733, "epoch": 231, "memory": 36824, "step": 72090}
{"lr": 0.0013641967844742187, "data_time": 0.0023587942123413086, "grad_norm": 0.149460668861866, "loss": 0.4856099247932434, "time": 1.8298315048217773, "epoch": 231, "memory": 36824, "step": 72190}
{"lr": 0.0013634472521443987, "data_time": 0.0028009653091430665, "grad_norm": 0.15271753519773484, "loss": 0.48005187809467315, "time": 1.901463794708252, "epoch": 231, "memory": 36824, "step": 72290}
{"lr": 0.0013625990980057035, "data_time": 0.28749613761901854, "grad_norm": 0.18777385205030442, "loss": 0.4814878284931183, "time": 1.852297568321228, "epoch": 232, "memory": 36824, "step": 72403}
{"lr": 0.0013618474740225506, "data_time": 0.002336883544921875, "grad_norm": 0.1454724371433258, "loss": 0.48532001972198485, "time": 1.8472466230392457, "epoch": 232, "memory": 36824, "step": 72503}
{"lr": 0.0013610948700931492, "data_time": 0.0029847145080566405, "grad_norm": 0.16929301023483276, "loss": 0.48108480870723724, "time": 1.7874080896377564, "epoch": 232, "memory": 36824, "step": 72603}
{"lr": 0.0013602432499848487, "data_time": 0.0646932601928711, "grad_norm": 0.1579933539032936, "loss": 0.487488666176796, "time": 1.9074979782104493, "epoch": 233, "memory": 36824, "step": 72716}
{"lr": 0.0013594885631477444, "data_time": 0.002527189254760742, "grad_norm": 0.12913338020443915, "loss": 0.4852654755115509, "time": 1.857899808883667, "epoch": 233, "memory": 36824, "step": 72816}
{"lr": 0.0013587329004786835, "data_time": 0.0035451412200927734, "grad_norm": 0.1609171599149704, "loss": 0.4820512145757675, "time": 1.854414439201355, "epoch": 233, "memory": 36824, "step": 72916}
{"lr": 0.0013578778289526157, "data_time": 0.23073384761810303, "grad_norm": 0.14681339412927627, "loss": 0.4812871187925339, "time": 1.8302327394485474, "epoch": 234, "memory": 36824, "step": 73029}
{"lr": 0.0013571200921570757, "data_time": 0.002412104606628418, "grad_norm": 0.1626623496413231, "loss": 0.48446099162101747, "time": 1.8062664985656738, "epoch": 234, "memory": 36824, "step": 73129}
{"lr": 0.0013563613836605497, "data_time": 0.0034571409225463865, "grad_norm": 0.15127917230129242, "loss": 0.4817559033632278, "time": 1.8943265914916991, "epoch": 234, "memory": 36824, "step": 73229}
{"lr": 0.0013555028753275253, "data_time": 0.002680683135986328, "grad_norm": 0.15968766063451767, "loss": 0.4828383713960648, "time": 1.8672200202941895, "epoch": 235, "memory": 36824, "step": 73342}
{"lr": 0.001354742101521179, "data_time": 0.002700948715209961, "grad_norm": 0.15941371023654938, "loss": 0.48107819855213163, "time": 1.8190680503845216, "epoch": 235, "memory": 36824, "step": 73442}
{"lr": 0.0013539803601614157, "data_time": 0.0035394668579101563, "grad_norm": 0.16084563732147217, "loss": 0.4839111179113388, "time": 1.8450066566467285, "epoch": 235, "memory": 36824, "step": 73542}
{"lr": 0.0013531184296909763, "data_time": 0.17309262752532958, "grad_norm": 0.1708119750022888, "loss": 0.48196337521076205, "time": 1.8772204637527465, "epoch": 236, "memory": 36824, "step": 73655}
{"lr": 0.0013523546318733501, "data_time": 0.0027045965194702148, "grad_norm": 0.18669128865003587, "loss": 0.484685018658638, "time": 1.899194073677063, "epoch": 236, "memory": 36824, "step": 73755}
{"lr": 0.0013515898706664136, "data_time": 0.0033834695816040037, "grad_norm": 0.15742669105529786, "loss": 0.4830022782087326, "time": 1.912669825553894, "epoch": 236, "memory": 36824, "step": 73855}
{"lr": 0.001350724532786571, "data_time": 0.3098158359527588, "grad_norm": 0.16645059287548064, "loss": 0.4797781229019165, "time": 1.985279631614685, "epoch": 237, "memory": 36824, "step": 73968}
{"lr": 0.001349957724008862, "data_time": 0.002424955368041992, "grad_norm": 0.16431880742311478, "loss": 0.48385034799575805, "time": 1.8813722372055053, "epoch": 237, "memory": 36824, "step": 74068}
{"lr": 0.0013491899560224094, "data_time": 0.0033548831939697265, "grad_norm": 0.16313125789165497, "loss": 0.48393929600715635, "time": 2.252277636528015, "epoch": 237, "memory": 36824, "step": 74168}
{"lr": 0.0013483212255194013, "data_time": 0.048781847953796385, "grad_norm": 0.15550560206174852, "loss": 0.4829013794660568, "time": 1.8086253166198731, "epoch": 238, "memory": 36824, "step": 74281}
{"lr": 0.0013475514188842554, "data_time": 0.002697253227233887, "grad_norm": 0.14093910083174704, "loss": 0.4828924298286438, "time": 1.8710704565048217, "epoch": 238, "memory": 36824, "step": 74381}
{"lr": 0.001346780657237323, "data_time": 0.0026340723037719727, "grad_norm": 0.17136573642492295, "loss": 0.4816576212644577, "time": 1.881612730026245, "epoch": 238, "memory": 36824, "step": 74481}
{"lr": 0.001345908548955357, "data_time": 0.1884401559829712, "grad_norm": 0.1545127458870411, "loss": 0.4828323870897293, "time": 1.9123696565628052, "epoch": 239, "memory": 36824, "step": 74594}
{"lr": 0.0013451357576166453, "data_time": 0.0025395870208740233, "grad_norm": 0.1849089652299881, "loss": 0.4805834174156189, "time": 1.8561906814575195, "epoch": 239, "memory": 36824, "step": 74694}
{"lr": 0.0013443620154794244, "data_time": 0.0033693552017211915, "grad_norm": 0.15676687210798262, "loss": 0.48418191969394686, "time": 2.1365533828735352, "epoch": 239, "memory": 36824, "step": 74794}
{"lr": 0.0013434865443204254, "data_time": 0.3052663803100586, "grad_norm": 0.15525953248143196, "loss": 0.4858451157808304, "time": 1.911409592628479, "epoch": 240, "memory": 36824, "step": 74907}
{"lr": 0.0013427107814830213, "data_time": 0.0028578996658325194, "grad_norm": 0.15070439726114274, "loss": 0.4820707321166992, "time": 1.8528280019760133, "epoch": 240, "memory": 36824, "step": 75007}
{"lr": 0.0013419340720766282, "data_time": 0.002822589874267578, "grad_norm": 0.17516168355941772, "loss": 0.4802543580532074, "time": 1.8322125911712646, "epoch": 240, "memory": 36824, "step": 75107}
{"lr": 0.0013410552529999813, "data_time": 0.03380887508392334, "grad_norm": 0.15033247470855712, "loss": 0.4872234344482422, "time": 1.8618531942367553, "epoch": 241, "memory": 36824, "step": 75220}
{"lr": 0.0013402765319195304, "data_time": 0.002477598190307617, "grad_norm": 0.1556535169482231, "loss": 0.48145683109760284, "time": 1.8693603992462158, "epoch": 241, "memory": 36824, "step": 75320}
{"lr": 0.0013394968685157883, "data_time": 0.004048752784729004, "grad_norm": 0.1747414268553257, "loss": 0.48345259130001067, "time": 1.8919970989227295, "epoch": 241, "memory": 36824, "step": 75420}
{"lr": 0.0013386147165380857, "data_time": 0.5930778026580811, "grad_norm": 0.1844172492623329, "loss": 0.4838539183139801, "time": 1.8732709169387818, "epoch": 242, "memory": 36824, "step": 75533}
{"lr": 0.0013378330505207847, "data_time": 0.34124293327331545, "grad_norm": 0.1542141243815422, "loss": 0.4829935908317566, "time": 1.8749224901199342, "epoch": 242, "memory": 36824, "step": 75633}
{"lr": 0.0013370504464419873, "data_time": 0.0028365612030029296, "grad_norm": 0.17605492025613784, "loss": 0.4819949984550476, "time": 1.8602602005004882, "epoch": 242, "memory": 36824, "step": 75733}
{"lr": 0.001336164976636776, "data_time": 0.29975292682647703, "grad_norm": 0.16148972809314727, "loss": 0.4857534974813461, "time": 1.847831654548645, "epoch": 243, "memory": 36824, "step": 75846}
{"lr": 0.0013353803790391357, "data_time": 0.00290071964263916, "grad_norm": 0.19814031571149826, "loss": 0.4843493580818176, "time": 1.8962692737579345, "epoch": 243, "memory": 36824, "step": 75946}
{"lr": 0.0013345948476578288, "data_time": 0.003733372688293457, "grad_norm": 0.1491330847144127, "loss": 0.4856736183166504, "time": 1.9025542259216308, "epoch": 243, "memory": 36824, "step": 76046}
{"lr": 0.0013337060751553418, "data_time": 0.42028398513793946, "grad_norm": 0.16327501609921455, "loss": 0.4825188785791397, "time": 1.8332115888595581, "epoch": 244, "memory": 36824, "step": 76159}
{"lr": 0.0013329185593839736, "data_time": 0.0023257970809936524, "grad_norm": 0.16276891827583312, "loss": 0.4825170636177063, "time": 1.8251164913177491, "epoch": 244, "memory": 36824, "step": 76259}
{"lr": 0.0013321301141227197, "data_time": 0.003102707862854004, "grad_norm": 0.1719866469502449, "loss": 0.47907252311706544, "time": 1.8415830612182618, "epoch": 244, "memory": 36824, "step": 76359}
{"lr": 0.001331238054109626, "data_time": 0.16187331676483155, "grad_norm": 0.16350530236959457, "loss": 0.4819195032119751, "time": 1.9249197721481324, "epoch": 245, "memory": 36824, "step": 76472}
{"lr": 0.0013304476336210012, "data_time": 0.0026155948638916016, "grad_norm": 0.1702125370502472, "loss": 0.4811773031949997, "time": 2.06211998462677, "epoch": 245, "memory": 36824, "step": 76572}
{"lr": 0.0013296562879521572, "data_time": 0.0029171228408813475, "grad_norm": 0.15501829907298087, "loss": 0.4840173751115799, "time": 1.8234153270721436, "epoch": 245, "memory": 36824, "step": 76672}
{"lr": 0.001328760955671305, "data_time": 0.26416749954223634, "grad_norm": 0.16391115486621857, "loss": 0.4783328741788864, "time": 1.8519319295883179, "epoch": 246, "memory": 36824, "step": 76785}
{"lr": 0.0013279676439715302, "data_time": 0.0028108835220336916, "grad_norm": 0.149169659614563, "loss": 0.48044107258319857, "time": 1.9030342817306518, "epoch": 246, "memory": 36824, "step": 76885}
{"lr": 0.0013271734114170075, "data_time": 0.004045820236206055, "grad_norm": 0.15772337466478348, "loss": 0.48088730275630953, "time": 1.867690658569336, "epoch": 246, "memory": 36824, "step": 76985}
{"lr": 0.001326274822167154, "data_time": 0.0534421443939209, "grad_norm": 0.16036772727966309, "loss": 0.48585469722747804, "time": 1.8029584884643555, "epoch": 247, "memory": 36824, "step": 77098}
{"lr": 0.001325478632811733, "data_time": 0.05659430027008057, "grad_norm": 0.1473286360502243, "loss": 0.4832615703344345, "time": 2.2910024642944338, "epoch": 247, "memory": 36824, "step": 77198}
{"lr": 0.0013246815269427753, "data_time": 0.0027527570724487304, "grad_norm": 0.1503334291279316, "loss": 0.484858301281929, "time": 1.866309356689453, "epoch": 247, "memory": 36824, "step": 77298}
{"lr": 0.0013237796960783288, "data_time": 0.002437281608581543, "grad_norm": 0.15616809353232383, "loss": 0.48677596151828767, "time": 1.806966781616211, "epoch": 248, "memory": 36824, "step": 77411}
{"lr": 0.0013229806426719453, "data_time": 0.002489805221557617, "grad_norm": 0.16483001708984374, "loss": 0.47938292324542997, "time": 1.9090741872787476, "epoch": 248, "memory": 36824, "step": 77511}
{"lr": 0.0013221806771088935, "data_time": 0.003054904937744141, "grad_norm": 0.14812077432870865, "loss": 0.48138628602027894, "time": 1.895889163017273, "epoch": 248, "memory": 36824, "step": 77611}
{"lr": 0.0013212756200396568, "data_time": 0.22513580322265625, "grad_norm": 0.15570951104164124, "loss": 0.4825855314731598, "time": 1.8790313005447388, "epoch": 249, "memory": 36824, "step": 77724}
{"lr": 0.001320473716235927, "data_time": 0.0024422407150268555, "grad_norm": 0.16497478932142257, "loss": 0.48119122684001925, "time": 1.8673166275024413, "epoch": 249, "memory": 36824, "step": 77824}
{"lr": 0.0013196709046479843, "data_time": 0.0031386375427246093, "grad_norm": 0.16545308083295823, "loss": 0.4770423620939255, "time": 1.8523081541061401, "epoch": 249, "memory": 36824, "step": 77924}
{"lr": 0.0013187626368388848, "data_time": 0.26259632110595704, "grad_norm": 0.1534494385123253, "loss": 0.4828143984079361, "time": 1.8588539123535157, "epoch": 250, "memory": 36824, "step": 78037}
{"lr": 0.0013179578963401324, "data_time": 0.0025870084762573244, "grad_norm": 0.14573459327220917, "loss": 0.4806345462799072, "time": 1.8638402938842773, "epoch": 250, "memory": 36824, "step": 78137}
{"lr": 0.0013171522524451369, "data_time": 0.002801656723022461, "grad_norm": 0.16195836961269378, "loss": 0.4826097279787064, "time": 1.903167200088501, "epoch": 250, "memory": 36824, "step": 78237}
{"lr": 0.0013162407894159675, "data_time": 0.05484275817871094, "grad_norm": 0.17875736802816392, "loss": 0.4804446369409561, "time": 1.8571078538894654, "epoch": 251, "memory": 36824, "step": 78350}
{"lr": 0.0013154332259729881, "data_time": 0.002497076988220215, "grad_norm": 0.16645041704177857, "loss": 0.487038916349411, "time": 1.8470133066177368, "epoch": 251, "memory": 36824, "step": 78450}
{"lr": 0.0013146247635371687, "data_time": 0.002782726287841797, "grad_norm": 0.1601191997528076, "loss": 0.4825518518686295, "time": 2.010520815849304, "epoch": 251, "memory": 36824, "step": 78550}
{"lr": 0.0013137101208623156, "data_time": 0.13199076652526856, "grad_norm": 0.17166872024536134, "loss": 0.4836231738328934, "time": 1.9114925861358643, "epoch": 252, "memory": 36824, "step": 78663}
{"lr": 0.0013128997482741394, "data_time": 0.0025556325912475587, "grad_norm": 0.1592811495065689, "loss": 0.48184776604175567, "time": 1.8740501880645752, "epoch": 252, "memory": 36824, "step": 78763}
{"lr": 0.0013120884811118868, "data_time": 0.0028256654739379885, "grad_norm": 0.1546696200966835, "loss": 0.47922137677669524, "time": 1.8293545722961426, "epoch": 252, "memory": 36824, "step": 78863}
{"lr": 0.0013111706744200703, "data_time": 0.12813568115234375, "grad_norm": 0.17274154722690582, "loss": 0.48222185373306276, "time": 2.011630654335022, "epoch": 253, "memory": 36824, "step": 78976}
{"lr": 0.0013103575065337297, "data_time": 0.0024592399597167967, "grad_norm": 0.15024984404444694, "loss": 0.4798410892486572, "time": 1.8445698261260985, "epoch": 253, "memory": 36824, "step": 79076}
{"lr": 0.0013095434485073624, "data_time": 0.002729916572570801, "grad_norm": 0.17086193561553956, "loss": 0.48441309332847593, "time": 1.8349255323410034, "epoch": 253, "memory": 36824, "step": 79176}
{"lr": 0.001308622493481367, "data_time": 0.30516419410705564, "grad_norm": 0.16417340636253358, "loss": 0.48280046582221986, "time": 1.8411162853240968, "epoch": 254, "memory": 36824, "step": 79289}
{"lr": 0.0013078065441916563, "data_time": 0.0026018619537353516, "grad_norm": 0.1518864080309868, "loss": 0.4819998651742935, "time": 1.827248787879944, "epoch": 254, "memory": 36824, "step": 79389}
{"lr": 0.0013069897092111823, "data_time": 0.003594183921813965, "grad_norm": 0.1653088167309761, "loss": 0.48082988560199735, "time": 1.913268780708313, "epoch": 254, "memory": 36824, "step": 79489}
{"lr": 0.0013060656215875877, "data_time": 0.516741132736206, "grad_norm": 0.16398757249116896, "loss": 0.47827514708042146, "time": 2.210275149345398, "epoch": 255, "memory": 36824, "step": 79602}
{"lr": 0.0013052469048368297, "data_time": 0.002446627616882324, "grad_norm": 0.1644325315952301, "loss": 0.4806642383337021, "time": 1.8293844223022462, "epoch": 255, "memory": 36824, "step": 79702}
{"lr": 0.0013044273068597019, "data_time": 0.002778959274291992, "grad_norm": 0.16155097782611846, "loss": 0.4846877306699753, "time": 1.8139762878417969, "epoch": 255, "memory": 36824, "step": 79802}
{"lr": 0.0013035001024286179, "data_time": 0.053454232215881345, "grad_norm": 0.18685560375452043, "loss": 0.4890377163887024, "time": 1.8181527137756348, "epoch": 256, "memory": 36824, "step": 79915}
{"lr": 0.001302678632206424, "data_time": 0.002637314796447754, "grad_norm": 0.1586872398853302, "loss": 0.4820957869291306, "time": 1.8698686838150025, "epoch": 256, "memory": 36824, "step": 80015}
{"lr": 0.0013018562852373122, "data_time": 0.0029271125793457033, "grad_norm": 0.16278480738401413, "loss": 0.4820449501276016, "time": 1.834977889060974, "epoch": 256, "memory": 36824, "step": 80115}
{"lr": 0.001300925979842103, "data_time": 0.4922011375427246, "grad_norm": 0.1723363384604454, "loss": 0.4866482734680176, "time": 2.1557443141937256, "epoch": 257, "memory": 36824, "step": 80228}
{"lr": 0.00130010177018513, "data_time": 0.002254676818847656, "grad_norm": 0.16133795827627181, "loss": 0.48351451456546785, "time": 1.8788933515548707, "epoch": 257, "memory": 36824, "step": 80328}
{"lr": 0.0012992766882756737, "data_time": 0.003296208381652832, "grad_norm": 0.15350354611873626, "loss": 0.4821363866329193, "time": 1.8247533559799194, "epoch": 257, "memory": 36824, "step": 80428}
{"lr": 0.0012983432978126925, "data_time": 0.0025165557861328127, "grad_norm": 0.14986611604690553, "loss": 0.48461141884326936, "time": 1.864465069770813, "epoch": 258, "memory": 36824, "step": 80541}
{"lr": 0.0012975163628044114, "data_time": 0.002327585220336914, "grad_norm": 0.1731020390987396, "loss": 0.4780069410800934, "time": 1.9038297414779664, "epoch": 258, "memory": 36824, "step": 80641}
{"lr": 0.0012966885600529865, "data_time": 0.0032823562622070314, "grad_norm": 0.16524331420660018, "loss": 0.4821233004331589, "time": 2.913168478012085, "epoch": 258, "memory": 36824, "step": 80741}
{"lr": 0.0012957521004713008, "data_time": 0.2549973249435425, "grad_norm": Infinity, "loss": 0.4847829520702362, "time": 1.9470073699951171, "epoch": 259, "memory": 36824, "step": 80854}
{"lr": 0.0012949224542417495, "data_time": 0.0024561166763305666, "grad_norm": 0.1615828588604927, "loss": 0.48326529264450074, "time": 1.9309922456741333, "epoch": 259, "memory": 36824, "step": 80954}
{"lr": 0.0012940919447932187, "data_time": 0.0027848958969116213, "grad_norm": 0.16737180799245835, "loss": 0.48620631694793703, "time": 1.822647476196289, "epoch": 259, "memory": 36824, "step": 81054}
{"lr": 0.0012931524320943374, "data_time": 0.0027030229568481444, "grad_norm": 0.17327191829681396, "loss": 0.48129959106445314, "time": 1.8074920415878295, "epoch": 260, "memory": 36824, "step": 81167}
{"lr": 0.0012923200888198818, "data_time": 0.0025645732879638673, "grad_norm": 0.18734045028686525, "loss": 0.48076626360416413, "time": 1.8494446992874145, "epoch": 260, "memory": 36824, "step": 81267}
{"lr": 0.001291486886865362, "data_time": 0.003415632247924805, "grad_norm": 0.16881235390901567, "loss": 0.4806696385145187, "time": 1.7721127271652222, "epoch": 260, "memory": 36824, "step": 81367}
{"lr": 0.0012905443371029645, "data_time": 0.3618757247924805, "grad_norm": 0.16404956430196763, "loss": 0.4785799026489258, "time": 1.9117422342300414, "epoch": 261, "memory": 36824, "step": 81480}
{"lr": 0.0012897093110060538, "data_time": 0.13367486000061035, "grad_norm": 0.1560257963836193, "loss": 0.47850096225738525, "time": 2.200817012786865, "epoch": 261, "memory": 36824, "step": 81580}
{"lr": 0.001288873430782666, "data_time": 0.0038576126098632812, "grad_norm": 0.16817830950021745, "loss": 0.480554336309433, "time": 1.9758296728134155, "epoch": 261, "memory": 36824, "step": 81680}
{"lr": 0.001287927860062331, "data_time": 0.11405587196350098, "grad_norm": 0.17585982456803323, "loss": 0.4815626949071884, "time": 1.8275146484375, "epoch": 262, "memory": 36824, "step": 81793}
{"lr": 0.0012870901654112531, "data_time": 0.0024428367614746094, "grad_norm": 0.15740560442209245, "loss": 0.48509758710861206, "time": 1.9711516857147218, "epoch": 262, "memory": 36824, "step": 81893}
{"lr": 0.001286251621201884, "data_time": 0.003401446342468262, "grad_norm": 0.18796814531087874, "loss": 0.4830436259508133, "time": 2.0120280504226686, "epoch": 262, "memory": 36824, "step": 81993}
{"lr": 0.0012853030456808018, "data_time": 0.39721438884735105, "grad_norm": 0.16517599821090698, "loss": 0.4799623429775238, "time": 1.8905514478683472, "epoch": 263, "memory": 36824, "step": 82106}
{"lr": 0.0012844626967894526, "data_time": 0.002919292449951172, "grad_norm": 0.16368523687124253, "loss": 0.4772356927394867, "time": 1.8764458656311036, "epoch": 263, "memory": 36824, "step": 82206}
{"lr": 0.0012836215029225094, "data_time": 0.0030330657958984376, "grad_norm": 0.17289603650569915, "loss": 0.4788177877664566, "time": 1.8488474369049073, "epoch": 263, "memory": 36824, "step": 82306}
{"lr": 0.0012826699388092217, "data_time": 0.10651001930236817, "grad_norm": 0.15967240184545517, "loss": 0.480251145362854, "time": 1.8734750747680664, "epoch": 264, "memory": 36824, "step": 82419}
{"lr": 0.0012818269500368424, "data_time": 0.002527809143066406, "grad_norm": 0.15948270857334138, "loss": 0.48354973196983336, "time": 1.852026891708374, "epoch": 264, "memory": 36824, "step": 82519}
{"lr": 0.0012809831208860085, "data_time": 0.0033673524856567385, "grad_norm": 0.1622668370604515, "loss": 0.48356168270111083, "time": 1.9161957263946534, "epoch": 264, "memory": 36824, "step": 82619}
{"lr": 0.0012800285844401186, "data_time": 0.4123469352722168, "grad_norm": 0.1588193655014038, "loss": 0.4826116919517517, "time": 1.8766971588134767, "epoch": 265, "memory": 36824, "step": 82732}
{"lr": 0.0012791829701910631, "data_time": 0.002452254295349121, "grad_norm": 0.1590607173740864, "loss": 0.4807900220155716, "time": 1.8625582218170167, "epoch": 265, "memory": 36824, "step": 82832}
{"lr": 0.001278336520175046, "data_time": 0.0031069517135620117, "grad_norm": 0.15654047280550004, "loss": 0.4831429421901703, "time": 1.8231324195861816, "epoch": 265, "memory": 36824, "step": 82932}
{"lr": 0.0012773790277069497, "data_time": 0.15782468318939208, "grad_norm": 0.17174840718507767, "loss": 0.48456639647483823, "time": 1.8823207855224608, "epoch": 266, "memory": 36824, "step": 83045}
{"lr": 0.001276530802430431, "data_time": 0.0027122259140014648, "grad_norm": 0.14968386441469192, "loss": 0.4829812079668045, "time": 1.8101454973220825, "epoch": 266, "memory": 36824, "step": 83145}
{"lr": 0.0012756817460127328, "data_time": 0.003422451019287109, "grad_norm": 0.16827588081359862, "loss": 0.48083928525447844, "time": 1.8142347812652588, "epoch": 266, "memory": 36824, "step": 83245}
{"lr": 0.0012747213138833324, "data_time": 0.4428880214691162, "grad_norm": 0.1823277696967125, "loss": 0.4803676664829254, "time": 2.098859429359436, "epoch": 267, "memory": 36824, "step": 83358}
{"lr": 0.0012738704920731813, "data_time": 0.003230905532836914, "grad_norm": 0.16373598873615264, "loss": 0.48268823325634, "time": 1.9271378993988038, "epoch": 267, "memory": 36824, "step": 83458}
{"lr": 0.00127301884376183, "data_time": 0.002655816078186035, "grad_norm": 0.17218803614377975, "loss": 0.4770577996969223, "time": 1.8429740428924561, "epoch": 267, "memory": 36824, "step": 83558}
{"lr": 0.0012720554883822626, "data_time": 0.3015883445739746, "grad_norm": 0.1588240534067154, "loss": 0.4830025374889374, "time": 1.840919589996338, "epoch": 268, "memory": 36824, "step": 83671}
{"lr": 0.0012712020845766757, "data_time": 0.0025085687637329103, "grad_norm": 0.15849525928497316, "loss": 0.48396613597869875, "time": 1.9311139822006225, "epoch": 268, "memory": 36824, "step": 83771}
{"lr": 0.0012703478589239905, "data_time": 0.0029242515563964845, "grad_norm": 0.17655299752950668, "loss": 0.4812524676322937, "time": 1.855148935317993, "epoch": 268, "memory": 36824, "step": 83871}
{"lr": 0.0012693815967553396, "data_time": 0.308345103263855, "grad_norm": 0.17975728809833527, "loss": 0.48144546151161194, "time": 1.8751534700393677, "epoch": 269, "memory": 36824, "step": 83984}
{"lr": 0.0012685256255366329, "data_time": 0.002375197410583496, "grad_norm": 0.16679855212569236, "loss": 0.47721209824085237, "time": 1.8450364589691162, "epoch": 269, "memory": 36824, "step": 84084}
{"lr": 0.001267668837138975, "data_time": 0.0029822349548339843, "grad_norm": 0.15490733832120895, "loss": 0.4818566530942917, "time": 1.84546639919281, "epoch": 269, "memory": 36824, "step": 84184}
{"lr": 0.0012666996846919971, "data_time": 0.017201733589172364, "grad_norm": 0.15830917805433273, "loss": 0.4792897492647171, "time": 1.8501670598983764, "epoch": 270, "memory": 36824, "step": 84297}
{"lr": 0.001265841160686357, "data_time": 0.002331399917602539, "grad_norm": 0.19428761154413224, "loss": 0.48055826127529144, "time": 1.8834847927093505, "epoch": 270, "memory": 36824, "step": 84397}
{"lr": 0.0012649818241838747, "data_time": 0.0029845237731933594, "grad_norm": 0.16883474886417388, "loss": 0.4837082982063293, "time": 1.8485052585601807, "epoch": 270, "memory": 36824, "step": 84497}
{"lr": 0.0012640097980187105, "data_time": 0.028054571151733397, "grad_norm": 0.1633654475212097, "loss": 0.48291192650794984, "time": 1.8613744020462035, "epoch": 271, "memory": 36824, "step": 84610}
{"lr": 0.0012631487358959433, "data_time": 0.0026109933853149412, "grad_norm": 0.1717364266514778, "loss": 0.48056460916996, "time": 1.8741584300994873, "epoch": 271, "memory": 36824, "step": 84710}
{"lr": 0.001262286865972327, "data_time": 0.0028301239013671874, "grad_norm": 0.1890076458454132, "loss": 0.48563366532325747, "time": 1.827986216545105, "epoch": 271, "memory": 36824, "step": 84810}
{"lr": 0.0012613119826982207, "data_time": 0.3098564147949219, "grad_norm": 0.15663193613290788, "loss": 0.48605166673660277, "time": 1.8293113708496094, "epoch": 272, "memory": 36824, "step": 84923}
{"lr": 0.0012604483971715029, "data_time": 0.08705525398254395, "grad_norm": 0.17172499224543572, "loss": 0.48336837291717527, "time": 1.8588222503662108, "epoch": 272, "memory": 36824, "step": 85023}
{"lr": 0.0012595840085537315, "data_time": 0.0032616853713989258, "grad_norm": 0.17945503294467927, "loss": 0.47946291863918306, "time": 1.9153637409210205, "epoch": 272, "memory": 36824, "step": 85123}
{"lr": 0.0012586062848287491, "data_time": 0.0434781551361084, "grad_norm": 0.16871217042207717, "loss": 0.48889370262622833, "time": 1.940173292160034, "epoch": 273, "memory": 36824, "step": 85236}
{"lr": 0.0012577401906543766, "data_time": 0.002834773063659668, "grad_norm": 0.1808316707611084, "loss": 0.47954065203666685, "time": 1.8482539892196654, "epoch": 273, "memory": 36824, "step": 85336}
{"lr": 0.0012568732981124693, "data_time": 0.0029367685317993166, "grad_norm": 0.16914909780025483, "loss": 0.4851475715637207, "time": 2.048939895629883, "epoch": 273, "memory": 36824, "step": 85436}
{"lr": 0.0012558927506432127, "data_time": 0.37883782386779785, "grad_norm": 0.1877358853816986, "loss": 0.4802216202020645, "time": 1.9292877435684204, "epoch": 274, "memory": 36824, "step": 85549}
{"lr": 0.0012550241626203425, "data_time": 0.015120267868041992, "grad_norm": 0.1696089655160904, "loss": 0.4798620522022247, "time": 1.8434340476989746, "epoch": 274, "memory": 36824, "step": 85649}
{"lr": 0.0012541547809670971, "data_time": 0.0027405977249145507, "grad_norm": 0.1784524753689766, "loss": 0.4820327669382095, "time": 1.8525402069091796, "epoch": 274, "memory": 36824, "step": 85749}
{"lr": 0.001253171426508415, "data_time": 0.1641085386276245, "grad_norm": 0.1684500828385353, "loss": 0.48279846012592315, "time": 1.8426485300064086, "epoch": 275, "memory": 36824, "step": 85862}
{"lr": 0.001252300359478819, "data_time": 0.0026552677154541016, "grad_norm": 0.16230116412043571, "loss": 0.47933209240436553, "time": 1.801612138748169, "epoch": 275, "memory": 36824, "step": 85962}
{"lr": 0.0012514285035695692, "data_time": 0.0035275936126708983, "grad_norm": 0.16812389492988586, "loss": 0.4849183827638626, "time": 1.8646655082702637, "epoch": 275, "memory": 36824, "step": 86062}
{"lr": 0.0012504423589242788, "data_time": 0.5058850526809693, "grad_norm": 0.20864929705858232, "loss": 0.48262868225574496, "time": 2.1941665172576905, "epoch": 276, "memory": 36824, "step": 86175}
{"lr": 0.0012495688277720932, "data_time": 0.002406406402587891, "grad_norm": 0.17533472925424576, "loss": 0.48224988877773284, "time": 1.8727567434310912, "epoch": 276, "memory": 36824, "step": 86275}
{"lr": 0.0012486945125044528, "data_time": 0.0036488771438598633, "grad_norm": 0.17909523993730544, "loss": 0.48533945679664614, "time": 1.9218060731887818, "epoch": 276, "memory": 36824, "step": 86375}
{"lr": 0.0012477055945230428, "data_time": 0.19688873291015624, "grad_norm": 0.17094181329011918, "loss": 0.478045666217804, "time": 1.8902343273162843, "epoch": 277, "memory": 36824, "step": 86488}
{"lr": 0.001246829614174501, "data_time": 0.0025310516357421875, "grad_norm": 0.16023070961236954, "loss": 0.48128001391887665, "time": 1.8885507822036742, "epoch": 277, "memory": 36824, "step": 86588}
{"lr": 0.001245952854488107, "data_time": 0.0035169124603271484, "grad_norm": 0.17706397473812102, "loss": 0.4779992014169693, "time": 1.8561987161636353, "epoch": 277, "memory": 36824, "step": 86688}
{"lr": 0.0012449611800684575, "data_time": 0.4624266386032104, "grad_norm": 0.16218092069029807, "loss": 0.4791425347328186, "time": 2.1873727560043337, "epoch": 278, "memory": 36824, "step": 86801}
{"lr": 0.00124408276549165, "data_time": 0.002553439140319824, "grad_norm": 0.1859918415546417, "loss": 0.482524573802948, "time": 1.8115650177001954, "epoch": 278, "memory": 36824, "step": 86901}
{"lr": 0.0012432035763679042, "data_time": 0.003340268135070801, "grad_norm": 0.16578012108802795, "loss": 0.48186502754688265, "time": 1.8853053092956542, "epoch": 278, "memory": 36824, "step": 87001}
{"lr": 0.001242209162454997, "data_time": 0.06506755352020263, "grad_norm": 0.1798197880387306, "loss": 0.48436143696308137, "time": 1.8622668027877807, "epoch": 279, "memory": 36824, "step": 87114}
{"lr": 0.0012413283286596022, "data_time": 0.002477598190307617, "grad_norm": 0.18908702880144118, "loss": 0.4783125758171082, "time": 1.8802393674850464, "epoch": 279, "memory": 36824, "step": 87214}
{"lr": 0.0012404467251214215, "data_time": 0.002804255485534668, "grad_norm": 0.17094190865755082, "loss": 0.48081402480602264, "time": 1.8540230512619018, "epoch": 279, "memory": 36824, "step": 87314}
{"lr": 0.0012394495887070455, "data_time": 0.4245533227920532, "grad_norm": 0.17864836901426315, "loss": 0.48201326429843905, "time": 1.80762038230896, "epoch": 280, "memory": 36824, "step": 87427}
{"lr": 0.0012385663507440852, "data_time": 0.07674381732940674, "grad_norm": 0.17529209703207016, "loss": 0.4826300054788589, "time": 1.8911869287490846, "epoch": 280, "memory": 36824, "step": 87527}
{"lr": 0.0012376823478556411, "data_time": 0.0027811288833618163, "grad_norm": 0.1693097934126854, "loss": 0.4755985260009766, "time": 1.8506044387817382, "epoch": 280, "memory": 36824, "step": 87627}
{"lr": 0.001236682505978107, "data_time": 0.2706346273422241, "grad_norm": 0.185646216571331, "loss": 0.4801160514354706, "time": 1.8831587791442872, "epoch": 281, "memory": 36824, "step": 87740}
{"lr": 0.001235796878939679, "data_time": 0.0028038740158081053, "grad_norm": 0.25574619844555857, "loss": 0.47906226813793185, "time": 1.8624899625778197, "epoch": 281, "memory": 36824, "step": 87840}
{"lr": 0.0012349104918061439, "data_time": 0.0028882503509521486, "grad_norm": 0.15603246539831161, "loss": 0.47821242809295655, "time": 1.832064151763916, "epoch": 281, "memory": 36824, "step": 87940}
{"lr": 0.0012339079615499898, "data_time": 0.0896073818206787, "grad_norm": 0.161756931245327, "loss": 0.48135738372802733, "time": 1.826097321510315, "epoch": 282, "memory": 36824, "step": 88053}
{"lr": 0.001233019960569019, "data_time": 0.0026724815368652345, "grad_norm": 0.1549589902162552, "loss": 0.4832117885351181, "time": 1.9011346340179442, "epoch": 282, "memory": 36824, "step": 88153}
{"lr": 0.0012321312043363018, "data_time": 0.0035291433334350584, "grad_norm": 0.16861541271209718, "loss": 0.4788551479578018, "time": 1.8613276481628418, "epoch": 282, "memory": 36824, "step": 88253}
{"lr": 0.0012311260028320037, "data_time": 0.060114145278930664, "grad_norm": 0.18009046465158463, "loss": 0.48404496908187866, "time": 1.8448899030685424, "epoch": 283, "memory": 36824, "step": 88366}
{"lr": 0.001230235643081975, "data_time": 0.0023554563522338867, "grad_norm": 0.16682565063238144, "loss": 0.48086141645908353, "time": 1.8585777044296266, "epoch": 283, "memory": 36824, "step": 88466}
{"lr": 0.0012293445329364676, "data_time": 0.0030233144760131838, "grad_norm": 0.1910279244184494, "loss": 0.48276451230049133, "time": 1.8628387928009034, "epoch": 283, "memory": 36824, "step": 88566}
{"lr": 0.001228336677360145, "data_time": 0.2759974718093872, "grad_norm": 0.17071977108716965, "loss": 0.4843105971813202, "time": 1.8380654573440551, "epoch": 284, "memory": 36824, "step": 88679}
{"lr": 0.0012274439740548504, "data_time": 0.002493000030517578, "grad_norm": 0.18279451578855516, "loss": 0.48683769404888155, "time": 1.8796708106994628, "epoch": 284, "memory": 36824, "step": 88779}
{"lr": 0.0012265505252231663, "data_time": 0.0028530120849609374, "grad_norm": 0.20518281161785126, "loss": 0.4839979648590088, "time": 1.922701907157898, "epoch": 284, "memory": 36824, "step": 88879}
{"lr": 0.0012255400327962913, "data_time": 0.14635851383209228, "grad_norm": 0.1735308900475502, "loss": 0.47862088680267334, "time": 1.7773293733596802, "epoch": 285, "memory": 36824, "step": 88992}
{"lr": 0.001224645001189567, "data_time": 0.0026392459869384764, "grad_norm": 0.1845301792025566, "loss": 0.47772026658058164, "time": 1.7770188808441163, "epoch": 285, "memory": 36824, "step": 89092}
{"lr": 0.0012237492289382805, "data_time": 0.003421378135681152, "grad_norm": 0.1774527870118618, "loss": 0.48139275312423707, "time": 1.8208734512329101, "epoch": 285, "memory": 36824, "step": 89192}
{"lr": 0.0012227361169273802, "data_time": 0.3109501838684082, "grad_norm": 0.1576695591211319, "loss": 0.48143510818481444, "time": 1.76846604347229, "epoch": 286, "memory": 36824, "step": 89305}
{"lr": 0.0012218387723128485, "data_time": 0.0027611970901489256, "grad_norm": 0.17398348897695542, "loss": 0.48459228277206423, "time": 1.8728353023529052, "epoch": 286, "memory": 36824, "step": 89405}
{"lr": 0.0012209406919482376, "data_time": 0.0028142213821411135, "grad_norm": 0.16671264544129372, "loss": 0.4795283913612366, "time": 1.8226284503936767, "epoch": 286, "memory": 36824, "step": 89505}
{"lr": 0.0012199249776646019, "data_time": 0.0025427818298339845, "grad_norm": 0.1604778900742531, "loss": 0.47885760068893435, "time": 1.7921372413635255, "epoch": 287, "memory": 36824, "step": 89618}
{"lr": 0.0012190253353754046, "data_time": 0.0029241561889648436, "grad_norm": 0.17398833185434343, "loss": 0.48139168620109557, "time": 1.9016986608505249, "epoch": 287, "memory": 36824, "step": 89718}
{"lr": 0.0012181249622431835, "data_time": 0.0037847280502319334, "grad_norm": 0.17868915647268296, "loss": 0.48546372056007386, "time": 1.7897377014160156, "epoch": 287, "memory": 36824, "step": 89818}
{"lr": 0.0012171066630425667, "data_time": 0.11839406490325928, "grad_norm": 0.15763043463230134, "loss": 0.48211602568626405, "time": 1.8541191339492797, "epoch": 288, "memory": 36824, "step": 89931}
{"lr": 0.0012162047384511087, "data_time": 0.00262148380279541, "grad_norm": 0.17697622925043105, "loss": 0.4818841189146042, "time": 1.7682947635650634, "epoch": 288, "memory": 36824, "step": 90031}
{"lr": 0.0012153020879361715, "data_time": 0.002679324150085449, "grad_norm": 0.1578199692070484, "loss": 0.4819759547710419, "time": 1.8329381942749023, "epoch": 288, "memory": 36824, "step": 90131}
{"lr": 0.0012142812212184977, "data_time": 0.1873555898666382, "grad_norm": 0.1740087866783142, "loss": 0.48304367661476133, "time": 1.8267927646636963, "epoch": 289, "memory": 36824, "step": 90244}
{"lr": 0.0012133770297361826, "data_time": 0.0027408838272094727, "grad_norm": 0.17343268543481827, "loss": 0.48213688731193544, "time": 1.8155649900436401, "epoch": 289, "memory": 36824, "step": 90344}
{"lr": 0.001212472117262337, "data_time": 0.0030435562133789063, "grad_norm": 0.17481610476970671, "loss": 0.4825053095817566, "time": 1.8985262393951416, "epoch": 289, "memory": 36824, "step": 90444}
{"lr": 0.0012114487004714002, "data_time": 0.14493069648742676, "grad_norm": 0.15917131006717683, "loss": 0.4816634863615036, "time": 1.8243169069290162, "epoch": 290, "memory": 36824, "step": 90557}
{"lr": 0.0012105422575483628, "data_time": 0.0024631500244140627, "grad_norm": 0.1870897516608238, "loss": 0.47976015508174896, "time": 1.831784725189209, "epoch": 290, "memory": 36824, "step": 90657}
{"lr": 0.0012096350985780623, "data_time": 0.002997279167175293, "grad_norm": 0.1616581380367279, "loss": 0.4797366142272949, "time": 1.802563524246216, "epoch": 290, "memory": 36824, "step": 90757}
{"lr": 0.0012086091492012332, "data_time": 0.30461204051971436, "grad_norm": 0.16381081491708754, "loss": 0.4804235249757767, "time": 1.8143672466278076, "epoch": 291, "memory": 36824, "step": 90870}
{"lr": 0.0012077004703260846, "data_time": 0.0024499893188476562, "grad_norm": 0.1878269150853157, "loss": 0.4810716599225998, "time": 1.8110769510269165, "epoch": 291, "memory": 36824, "step": 90970}
{"lr": 0.0012067910803601764, "data_time": 0.0036716699600219727, "grad_norm": 0.18033164590597153, "loss": 0.4814055681228638, "time": 1.9115105628967286, "epoch": 291, "memory": 36824, "step": 91070}
{"lr": 0.0012057626159280984, "data_time": 0.353055477142334, "grad_norm": 0.17838048338890075, "loss": 0.48092061579227446, "time": 1.8086806297302247, "epoch": 292, "memory": 36824, "step": 91183}
{"lr": 0.001204851716627651, "data_time": 0.07460002899169922, "grad_norm": 0.16210429966449738, "loss": 0.4812580943107605, "time": 1.806321120262146, "epoch": 292, "memory": 36824, "step": 91283}
{"lr": 0.0012039401112051004, "data_time": 0.0030486345291137694, "grad_norm": 0.15880074352025986, "loss": 0.4840744733810425, "time": 1.7788846492767334, "epoch": 292, "memory": 36824, "step": 91383}
{"lr": 0.0012029091492913942, "data_time": 0.37594032287597656, "grad_norm": 0.17253743410110473, "loss": 0.4806526035070419, "time": 1.8095511198043823, "epoch": 293, "memory": 36824, "step": 91496}
{"lr": 0.001201996045130408, "data_time": 0.17318282127380372, "grad_norm": 0.1602926343679428, "loss": 0.48093327283859255, "time": 1.8111080884933473, "epoch": 293, "memory": 36824, "step": 91596}
{"lr": 0.0012010822398280342, "data_time": 0.0027256250381469727, "grad_norm": 0.1709352597594261, "loss": 0.48223390281200407, "time": 2.117877554893494, "epoch": 293, "memory": 36824, "step": 91696}
{"lr": 0.0012000487980489932, "data_time": 0.06747386455535889, "grad_norm": 0.1687474027276039, "loss": 0.4788459241390228, "time": 1.765341567993164, "epoch": 294, "memory": 36824, "step": 91809}
{"lr": 0.0011991335046298973, "data_time": 0.002423214912414551, "grad_norm": 0.18114778995513917, "loss": 0.47747328877449036, "time": 1.816185998916626, "epoch": 294, "memory": 36824, "step": 91909}
{"lr": 0.001198217515062114, "data_time": 0.002893662452697754, "grad_norm": 0.16397275030612946, "loss": 0.47693454623222353, "time": 1.7710220575332642, "epoch": 294, "memory": 36824, "step": 92009}
{"lr": 0.001197181611076405, "data_time": 0.2823342323303223, "grad_norm": 0.18138360381126403, "loss": 0.47999157309532164, "time": 1.8500335693359375, "epoch": 295, "memory": 36824, "step": 92122}
{"lr": 0.0011962641440390368, "data_time": 0.002597498893737793, "grad_norm": 0.1826413407921791, "loss": 0.4784623861312866, "time": 1.8254334688186646, "epoch": 295, "memory": 36824, "step": 92222}
{"lr": 0.0011953459858575776, "data_time": 0.003804516792297363, "grad_norm": 0.17318937927484512, "loss": 0.4817989319562912, "time": 1.751843547821045, "epoch": 295, "memory": 36824, "step": 92322}
{"lr": 0.0011943076373659464, "data_time": 0.002396392822265625, "grad_norm": 0.15771397352218627, "loss": 0.4825486302375793, "time": 1.8584079027175904, "epoch": 296, "memory": 36824, "step": 92435}
{"lr": 0.0011933880123872872, "data_time": 0.002619147300720215, "grad_norm": 0.1741726890206337, "loss": 0.48078140020370486, "time": 1.7694855451583862, "epoch": 296, "memory": 36824, "step": 92535}
{"lr": 0.0011924677012809426, "data_time": 0.0033976078033447266, "grad_norm": 0.19230266362428666, "loss": 0.4850552350282669, "time": 1.820190715789795, "epoch": 296, "memory": 36824, "step": 92635}
{"lr": 0.001191426926025896, "data_time": 0.23155791759490968, "grad_norm": 0.18166644871234894, "loss": 0.48187524676322935, "time": 1.8623704433441162, "epoch": 297, "memory": 36824, "step": 92748}
{"lr": 0.0011905051588198014, "data_time": 0.0026247501373291016, "grad_norm": 0.1962566167116165, "loss": 0.479705610871315, "time": 1.816762661933899, "epoch": 297, "memory": 36824, "step": 92848}
{"lr": 0.001189582710514149, "data_time": 0.0033542871475219726, "grad_norm": 0.18363093733787536, "loss": 0.4768923670053482, "time": 1.8211130857467652, "epoch": 297, "memory": 36824, "step": 92948}
{"lr": 0.0011885395262796667, "data_time": 0.0027779579162597657, "grad_norm": 0.18277738243341446, "loss": 0.4816654145717621, "time": 1.7911088943481446, "epoch": 298, "memory": 36824, "step": 93061}
{"lr": 0.0011876156325965918, "data_time": 0.0024806737899780275, "grad_norm": 0.18833690285682678, "loss": 0.4866655856370926, "time": 1.8353798389434814, "epoch": 298, "memory": 36824, "step": 93161}
{"lr": 0.0011866910628537256, "data_time": 0.002872800827026367, "grad_norm": 0.1697993129491806, "loss": 0.4865182638168335, "time": 1.7626312017440795, "epoch": 298, "memory": 36824, "step": 93261}
{"lr": 0.0011856454874649466, "data_time": 0.015827393531799315, "grad_norm": 0.16017615050077438, "loss": 0.4822559505701065, "time": 1.8310603380203248, "epoch": 299, "memory": 36824, "step": 93374}
{"lr": 0.0011847194830916872, "data_time": 0.0026380062103271485, "grad_norm": 0.1894756704568863, "loss": 0.4845851272344589, "time": 1.729222011566162, "epoch": 299, "memory": 36824, "step": 93474}
{"lr": 0.001183792807709957, "data_time": 0.0037223100662231445, "grad_norm": 0.1720273584127426, "loss": 0.4851186066865921, "time": 1.788316512107849, "epoch": 299, "memory": 36824, "step": 93574}
{"lr": 0.0011827448590328799, "data_time": 0.3331287860870361, "grad_norm": 0.17570577412843705, "loss": 0.48478555381298066, "time": 1.8149412870407104, "epoch": 300, "memory": 36824, "step": 93687}
{"lr": 0.0011818167597922953, "data_time": 0.0026454687118530273, "grad_norm": 0.15449073314666747, "loss": 0.48146202862262727, "time": 1.76548273563385, "epoch": 300, "memory": 36824, "step": 93787}
{"lr": 0.001180887994606023, "data_time": 0.00286717414855957, "grad_norm": 0.17526071518659592, "loss": 0.4789988398551941, "time": 1.8020971298217774, "epoch": 300, "memory": 36824, "step": 93887}
{"lr": 0.0011798376905471988, "data_time": 0.0026382684707641603, "grad_norm": 0.20767512321472167, "loss": 0.47413002550601957, "time": 1.7985978603363038, "epoch": 301, "memory": 36824, "step": 94000}
{"lr": 0.0011789075122979433, "data_time": 0.0026653051376342774, "grad_norm": 0.17685802429914474, "loss": 0.4791023850440979, "time": 1.7786776065826415, "epoch": 301, "memory": 36824, "step": 94100}
{"lr": 0.0011779766731771653, "data_time": 0.002908158302307129, "grad_norm": 0.1929968848824501, "loss": 0.48389557003974915, "time": 1.7547727346420288, "epoch": 301, "memory": 36824, "step": 94200}
{"lr": 0.0011769240316833937, "data_time": 0.2574009418487549, "grad_norm": 0.17513076961040497, "loss": 0.4802294999361038, "time": 1.8148770332336426, "epoch": 302, "memory": 36824, "step": 94313}
{"lr": 0.0011759917903196482, "data_time": 0.002571296691894531, "grad_norm": 0.1699385702610016, "loss": 0.48028205931186674, "time": 1.8714402675628663, "epoch": 302, "memory": 36824, "step": 94413}
{"lr": 0.001175058893169836, "data_time": 0.003746485710144043, "grad_norm": 0.17592989057302474, "loss": 0.4792145937681198, "time": 1.815717887878418, "epoch": 302, "memory": 36824, "step": 94513}
{"lr": 0.001174003932227856, "data_time": 0.1088953971862793, "grad_norm": 0.1922255739569664, "loss": 0.48208723664283754, "time": 1.7887662887573241, "epoch": 303, "memory": 36824, "step": 94626}
{"lr": 0.0011730696436790542, "data_time": 0.0027657508850097655, "grad_norm": 0.19634197652339935, "loss": 0.482526832818985, "time": 1.880968737602234, "epoch": 303, "memory": 36824, "step": 94726}
{"lr": 0.0011721347044408486, "data_time": 0.003536677360534668, "grad_norm": 0.17562715113162994, "loss": 0.47973875105381014, "time": 1.7459250926971435, "epoch": 303, "memory": 36824, "step": 94826}
{"lr": 0.0011710774420770303, "data_time": 0.0025190114974975586, "grad_norm": 0.1791679009795189, "loss": 0.47971550524234774, "time": 1.7949323415756226, "epoch": 304, "memory": 36824, "step": 94939}
{"lr": 0.0011701411223075846, "data_time": 0.0026087522506713866, "grad_norm": 0.17656266391277314, "loss": 0.4793607771396637, "time": 1.8175758600234986, "epoch": 304, "memory": 36824, "step": 95039}
{"lr": 0.001169204156956514, "data_time": 0.0030922889709472656, "grad_norm": 0.16885052919387816, "loss": 0.4761143118143082, "time": 1.755135726928711, "epoch": 304, "memory": 36824, "step": 95139}
{"lr": 0.001168144611236557, "data_time": 0.1731001615524292, "grad_norm": 0.19430186003446578, "loss": 0.47693963050842286, "time": 1.80178701877594, "epoch": 305, "memory": 36824, "step": 95252}
{"lr": 0.0011672062762455877, "data_time": 0.0025810480117797853, "grad_norm": 0.1716848075389862, "loss": 0.4801531583070755, "time": 1.8121048212051392, "epoch": 305, "memory": 36824, "step": 95352}
{"lr": 0.0011662673007918074, "data_time": 0.0031377792358398436, "grad_norm": 0.1841959685087204, "loss": 0.4777863144874573, "time": 1.7926321029663086, "epoch": 305, "memory": 36824, "step": 95452}
{"lr": 0.0011652054898204224, "data_time": 0.1658832311630249, "grad_norm": 0.1945878878235817, "loss": 0.4841981381177902, "time": 1.805171799659729, "epoch": 306, "memory": 36824, "step": 95565}
{"lr": 0.0011642651556414887, "data_time": 0.0024630546569824217, "grad_norm": 0.18760682493448258, "loss": 0.48521836698055265, "time": 1.8750269651412963, "epoch": 306, "memory": 36824, "step": 95665}
{"lr": 0.0011633241861294975, "data_time": 0.003021836280822754, "grad_norm": 0.19303894490003587, "loss": 0.4807632237672806, "time": 1.8360344409942626, "epoch": 306, "memory": 36824, "step": 95765}
{"lr": 0.0011622601280501046, "data_time": 0.002749514579772949, "grad_norm": 0.18109329491853715, "loss": 0.4779942393302917, "time": 1.811230230331421, "epoch": 307, "memory": 36824, "step": 95878}
{"lr": 0.0011613178107509224, "data_time": 0.0026086091995239256, "grad_norm": 0.18748193979263306, "loss": 0.47967322170734406, "time": 1.7842957496643066, "epoch": 307, "memory": 36824, "step": 95978}
{"lr": 0.0011603748632592929, "data_time": 0.003371143341064453, "grad_norm": 0.17809336334466935, "loss": 0.47849270701408386, "time": 1.771667742729187, "epoch": 307, "memory": 36824, "step": 96078}
{"lr": 0.0011593085762537077, "data_time": 0.353592848777771, "grad_norm": 0.1727188155055046, "loss": 0.4798036992549896, "time": 1.8345861196517945, "epoch": 308, "memory": 36824, "step": 96191}
{"lr": 0.0011583642919358766, "data_time": 0.002855801582336426, "grad_norm": 0.170607590675354, "loss": 0.47932220101356504, "time": 1.778818702697754, "epoch": 308, "memory": 36824, "step": 96291}
{"lr": 0.0011574193825769788, "data_time": 0.0029030799865722655, "grad_norm": 0.1789554089307785, "loss": 0.4832329750061035, "time": 1.8454895257949828, "epoch": 308, "memory": 36824, "step": 96391}
{"lr": 0.0011563508848651085, "data_time": 0.2999574661254883, "grad_norm": 0.192084202170372, "loss": 0.4817927449941635, "time": 1.7848618507385254, "epoch": 309, "memory": 36824, "step": 96504}
{"lr": 0.001155404649663843, "data_time": 0.0027257204055786133, "grad_norm": 0.18309212177991868, "loss": 0.47955107092857363, "time": 1.8458518505096435, "epoch": 309, "memory": 36824, "step": 96604}
{"lr": 0.0011544577945835739, "data_time": 0.0040798187255859375, "grad_norm": 0.1700036644935608, "loss": 0.4816106230020523, "time": 1.7928221225738525, "epoch": 309, "memory": 36824, "step": 96704}
{"lr": 0.0011533871044230918, "data_time": 0.27278666496276854, "grad_norm": 0.1852756693959236, "loss": 0.4821300029754639, "time": 1.7709014177322389, "epoch": 310, "memory": 36824, "step": 96817}
{"lr": 0.001152438934506944, "data_time": 0.0027557849884033204, "grad_norm": 0.19027100503444672, "loss": 0.48176435232162473, "time": 1.8192455053329468, "epoch": 310, "memory": 36824, "step": 96917}
{"lr": 0.0011514901498844402, "data_time": 0.0030527353286743165, "grad_norm": 0.1663026511669159, "loss": 0.4870552122592926, "time": 1.7096453189849854, "epoch": 310, "memory": 36824, "step": 97017}
{"lr": 0.0011504172855704875, "data_time": 0.2511580228805542, "grad_norm": 0.18411695063114167, "loss": 0.47933852970600127, "time": 1.8489005804061889, "epoch": 311, "memory": 36824, "step": 97130}
{"lr": 0.0011494671971410614, "data_time": 0.002555561065673828, "grad_norm": 0.18565103113651277, "loss": 0.47878464460372927, "time": 1.7744481563568115, "epoch": 311, "memory": 36824, "step": 97230}
{"lr": 0.0011485164991884374, "data_time": 0.0028319597244262696, "grad_norm": 0.18839482665061952, "loss": 0.48121299147605895, "time": 1.8113676309585571, "epoch": 311, "memory": 36824, "step": 97330}
{"lr": 0.0011474414790533067, "data_time": 0.05375666618347168, "grad_norm": 0.19965709745883942, "loss": 0.482355871796608, "time": 1.811983847618103, "epoch": 312, "memory": 36824, "step": 97443}
{"lr": 0.0011464894883449932, "data_time": 0.0029717206954956053, "grad_norm": 0.1898716315627098, "loss": 0.48287994861602784, "time": 1.8086093664169312, "epoch": 312, "memory": 36824, "step": 97543}
{"lr": 0.0011455368933070535, "data_time": 0.003417205810546875, "grad_norm": 0.17599427253007888, "loss": 0.478746822476387, "time": 1.8623327016830444, "epoch": 312, "memory": 36824, "step": 97643}
{"lr": 0.001144459735719874, "data_time": 0.1716519594192505, "grad_norm": 0.19156099259853362, "loss": 0.4827893882989883, "time": 1.8079103231430054, "epoch": 313, "memory": 36824, "step": 97756}
{"lr": 0.0011435058589995693, "data_time": 0.0024576663970947267, "grad_norm": 0.18442015796899797, "loss": 0.486859655380249, "time": 1.764953064918518, "epoch": 313, "memory": 36824, "step": 97856}
{"lr": 0.001142551383153535, "data_time": 0.003902697563171387, "grad_norm": 0.19025631099939347, "loss": 0.4807032436132431, "time": 1.8008830785751342, "epoch": 313, "memory": 36824, "step": 97956}
{"lr": 0.0011414721065199551, "data_time": 0.20788919925689697, "grad_norm": 0.16411014944314956, "loss": 0.4819142907857895, "time": 1.8197219848632813, "epoch": 314, "memory": 36824, "step": 98069}
{"lr": 0.0011405163600867797, "data_time": 0.0027582406997680663, "grad_norm": 0.18664154708385466, "loss": 0.48102711141109467, "time": 1.7912940979003906, "epoch": 314, "memory": 36824, "step": 98169}
{"lr": 0.0011395600197420083, "data_time": 0.003687405586242676, "grad_norm": 0.1693679764866829, "loss": 0.48030913472175596, "time": 1.8366989612579345, "epoch": 314, "memory": 36824, "step": 98269}
{"lr": 0.00113847864250389, "data_time": 0.3978468894958496, "grad_norm": 0.18215218484401702, "loss": 0.48135427832603456, "time": 1.9360342025756836, "epoch": 315, "memory": 36824, "step": 98382}
{"lr": 0.0011375210426889126, "data_time": 0.0024981021881103514, "grad_norm": 0.18162746876478195, "loss": 0.47903733253479003, "time": 1.810151171684265, "epoch": 315, "memory": 36824, "step": 98482}
{"lr": 0.0011365628541866227, "data_time": 0.003067898750305176, "grad_norm": 0.19943075627088547, "loss": 0.4829543143510818, "time": 1.7779191732406616, "epoch": 315, "memory": 36824, "step": 98582}
{"lr": 0.0011354793948217174, "data_time": 0.08198685646057129, "grad_norm": 0.19523852169513703, "loss": 0.4803155541419983, "time": 1.7872535943984986, "epoch": 316, "memory": 36824, "step": 98695}
{"lr": 0.0011345199579876775, "data_time": 0.002484631538391113, "grad_norm": 0.18748174905776976, "loss": 0.48633005619049074, "time": 1.828378963470459, "epoch": 316, "memory": 36824, "step": 98795}
{"lr": 0.0011335599377006664, "data_time": 0.002806687355041504, "grad_norm": 0.191411854326725, "loss": 0.48237129151821134, "time": 1.7814130544662476, "epoch": 316, "memory": 36824, "step": 98895}
{"lr": 0.0011324744147223076, "data_time": 0.002479362487792969, "grad_norm": 0.195454503595829, "loss": 0.4795661360025406, "time": 1.7745830059051513, "epoch": 317, "memory": 36824, "step": 99008}
{"lr": 0.0011315131572633337, "data_time": 0.0025090694427490233, "grad_norm": 0.20071423053741455, "loss": 0.48386471569538114, "time": 1.7829958200454712, "epoch": 317, "memory": 36824, "step": 99108}
{"lr": 0.0011305513215956995, "data_time": 0.0033330678939819335, "grad_norm": 0.19956302940845488, "loss": 0.47948221266269686, "time": 1.8254135608673097, "epoch": 317, "memory": 36824, "step": 99208}
{"lr": 0.0011294637535524778, "data_time": 0.00251924991607666, "grad_norm": 0.18898600935935975, "loss": 0.4798003017902374, "time": 1.8181716442108153, "epoch": 318, "memory": 36824, "step": 99321}
{"lr": 0.0011285006918938078, "data_time": 0.002476930618286133, "grad_norm": 0.19035661071538926, "loss": 0.4800828963518143, "time": 1.8812837839126586, "epoch": 318, "memory": 36824, "step": 99421}
{"lr": 0.0011275370572806696, "data_time": 0.002777242660522461, "grad_norm": 0.17907389029860496, "loss": 0.4790008455514908, "time": 1.7522172212600708, "epoch": 318, "memory": 36824, "step": 99521}
{"lr": 0.0011264474627561225, "data_time": 0.31435747146606446, "grad_norm": 0.18341762125492095, "loss": 0.4789006143808365, "time": 1.782721471786499, "epoch": 319, "memory": 36824, "step": 99634}
{"lr": 0.001125482613353822, "data_time": 0.0025304555892944336, "grad_norm": 0.20729002952575684, "loss": 0.479562383890152, "time": 1.745964527130127, "epoch": 319, "memory": 36824, "step": 99734}
{"lr": 0.001124517196261034, "data_time": 0.002984023094177246, "grad_norm": 0.19769458174705506, "loss": 0.48323524594306944, "time": 1.818442940711975, "epoch": 319, "memory": 36824, "step": 99834}
{"lr": 0.001123425593873325, "data_time": 0.18428413867950438, "grad_norm": 0.188147434592247, "loss": 0.4799580335617065, "time": 1.8042377471923827, "epoch": 320, "memory": 36824, "step": 99947}
{"lr": 0.0011224589732140083, "data_time": 0.0023663997650146484, "grad_norm": 0.1768282137811184, "loss": 0.47847341001033783, "time": 1.7517722845077515, "epoch": 320, "memory": 36824, "step": 100047}
{"lr": 0.0011214917901378866, "data_time": 0.002712726593017578, "grad_norm": 0.19916243255138397, "loss": 0.4818460524082184, "time": 1.7542521476745605, "epoch": 320, "memory": 36824, "step": 100147}
{"lr": 0.0011203981985394903, "data_time": 0.15175073146820067, "grad_norm": 0.1831878423690796, "loss": 0.48363883793354034, "time": 1.7902194023132325, "epoch": 321, "memory": 36824, "step": 100260}
{"lr": 0.0011194298231400385, "data_time": 0.0024390220642089844, "grad_norm": 0.1968400940299034, "loss": 0.4816294461488724, "time": 1.8117614269256592, "epoch": 321, "memory": 36824, "step": 100360}
{"lr": 0.0011184608906070722, "data_time": 0.002997589111328125, "grad_norm": 0.20193433612585068, "loss": 0.47736978232860566, "time": 1.8090699672698975, "epoch": 321, "memory": 36824, "step": 100460}
{"lr": 0.0011173653284844516, "data_time": 0.002737140655517578, "grad_norm": 0.18784691542387008, "loss": 0.4855561971664429, "time": 1.8306393146514892, "epoch": 322, "memory": 36824, "step": 100573}
{"lr": 0.0011163952148917277, "data_time": 0.002419304847717285, "grad_norm": 0.17935833036899568, "loss": 0.48281086087226865, "time": 1.7848268747329712, "epoch": 322, "memory": 36824, "step": 100673}
{"lr": 0.0011154245494583027, "data_time": 0.004510664939880371, "grad_norm": 0.18990952670574188, "loss": 0.4792942851781845, "time": 1.7351841926574707, "epoch": 322, "memory": 36824, "step": 100773}
{"lr": 0.001114327035531591, "data_time": 0.03341310024261475, "grad_norm": 0.1791630804538727, "loss": 0.482655456662178, "time": 1.7892924547195435, "epoch": 323, "memory": 36824, "step": 100886}
{"lr": 0.0011133552003221596, "data_time": 0.0025831937789916994, "grad_norm": 0.17524865120649338, "loss": 0.4798093169927597, "time": 1.8112109661102296, "epoch": 323, "memory": 36824, "step": 100986}
{"lr": 0.001112382818574268, "data_time": 0.0030437469482421874, "grad_norm": 0.16436986178159713, "loss": 0.4789182275533676, "time": 1.7602720975875854, "epoch": 323, "memory": 36824, "step": 101086}
{"lr": 0.0011112833715969498, "data_time": 0.0029433727264404296, "grad_norm": 0.19658688753843306, "loss": 0.48560039699077606, "time": 1.7766066551208497, "epoch": 324, "memory": 36824, "step": 101199}
{"lr": 0.0011103098313767939, "data_time": 0.0027152538299560548, "grad_norm": 0.1750485807657242, "loss": 0.4833013594150543, "time": 1.7362659692764282, "epoch": 324, "memory": 36824, "step": 101299}
{"lr": 0.001109335749929758, "data_time": 0.00292050838470459, "grad_norm": 0.18222004920244217, "loss": 0.47938556373119356, "time": 1.762886118888855, "epoch": 324, "memory": 36824, "step": 101399}
{"lr": 0.0011082343886883493, "data_time": 0.18768155574798584, "grad_norm": 0.17387241423130034, "loss": 0.48131464421749115, "time": 1.7773499727249145, "epoch": 325, "memory": 36824, "step": 101512}
{"lr": 0.0011072591600925841, "data_time": 0.002725982666015625, "grad_norm": 0.18188545256853103, "loss": 0.48062221705913544, "time": 1.7809409856796266, "epoch": 325, "memory": 36824, "step": 101612}
{"lr": 0.0011062833955907683, "data_time": 0.002958345413208008, "grad_norm": 0.1941744163632393, "loss": 0.47824908792972565, "time": 1.7780309200286866, "epoch": 325, "memory": 36824, "step": 101712}
{"lr": 0.0011051801389044943, "data_time": 0.22356801033020018, "grad_norm": 0.20444382578134537, "loss": 0.4748668193817139, "time": 1.8426388502120972, "epoch": 326, "memory": 36824, "step": 101825}
{"lr": 0.001104203238597088, "data_time": 0.002404475212097168, "grad_norm": 0.1884022668004036, "loss": 0.4775002658367157, "time": 1.81434907913208, "epoch": 326, "memory": 36824, "step": 101925}
{"lr": 0.001103225807713614, "data_time": 0.0032506704330444334, "grad_norm": 0.2053118832409382, "loss": 0.4771193891763687, "time": 1.8547411680221557, "epoch": 326, "memory": 36824, "step": 102025}
{"lr": 0.0011021206744340872, "data_time": 0.14258894920349122, "grad_norm": 0.199728362262249, "loss": 0.4788341522216797, "time": 1.823665237426758, "epoch": 327, "memory": 36824, "step": 102138}
{"lr": 0.0011011421191075678, "data_time": 0.00287628173828125, "grad_norm": 0.1825242668390274, "loss": 0.476729154586792, "time": 1.7868314981460571, "epoch": 327, "memory": 36824, "step": 102238}
{"lr": 0.0011001630385440333, "data_time": 0.0036439418792724608, "grad_norm": 0.19337748736143112, "loss": 0.4816011399030685, "time": 1.8193151235580445, "epoch": 327, "memory": 36824, "step": 102338}
{"lr": 0.0010990560475549313, "data_time": 0.0024164915084838867, "grad_norm": 0.17308135479688644, "loss": 0.4828196883201599, "time": 1.8250930786132813, "epoch": 328, "memory": 36824, "step": 102451}
{"lr": 0.0010980758539301109, "data_time": 0.0026111841201782227, "grad_norm": 0.18564238101243974, "loss": 0.47634656727313995, "time": 1.7379746198654176, "epoch": 328, "memory": 36824, "step": 102551}
{"lr": 0.001097095140416301, "data_time": 0.0029496669769287108, "grad_norm": 0.20134216248989106, "loss": 0.480329430103302, "time": 1.789938712120056, "epoch": 328, "memory": 36824, "step": 102651}
{"lr": 0.0010959863106330464, "data_time": 0.0025880813598632814, "grad_norm": 0.17659547328948974, "loss": 0.48081850111484525, "time": 1.8525658130645752, "epoch": 329, "memory": 36824, "step": 102764}
{"lr": 0.001095004495458729, "data_time": 0.0024748325347900392, "grad_norm": 0.2175094246864319, "loss": 0.48248598873615267, "time": 1.733499789237976, "epoch": 329, "memory": 36824, "step": 102864}
{"lr": 0.0010940221657523323, "data_time": 0.0029796361923217773, "grad_norm": 0.1840605691075325, "loss": 0.4775920480489731, "time": 1.7532792806625366, "epoch": 329, "memory": 36824, "step": 102964}
{"lr": 0.0010929115161217662, "data_time": 0.0027782678604125976, "grad_norm": 0.183203886449337, "loss": 0.47678242027759554, "time": 1.7689924716949463, "epoch": 330, "memory": 36824, "step": 103077}
{"lr": 0.0010919280961744645, "data_time": 0.0024704694747924804, "grad_norm": 0.19443391412496566, "loss": 0.4798226445913315, "time": 1.792885947227478, "epoch": 330, "memory": 36824, "step": 103177}
{"lr": 0.001090944167060786, "data_time": 0.004143261909484863, "grad_norm": 0.19313082098960876, "loss": 0.48551250994205475, "time": 1.889257788658142, "epoch": 330, "memory": 36824, "step": 103277}
{"lr": 0.0010898317165608479, "data_time": 0.12248189449310302, "grad_norm": 0.17991358935832977, "loss": 0.4787906438112259, "time": 1.7591230630874635, "epoch": 331, "memory": 36824, "step": 103390}
{"lr": 0.0010888467086444947, "data_time": 0.002508401870727539, "grad_norm": 0.1848162293434143, "loss": 0.47731091976165774, "time": 1.813077974319458, "epoch": 331, "memory": 36824, "step": 103490}
{"lr": 0.001087861196936166, "data_time": 0.003497314453125, "grad_norm": 0.20094237476587296, "loss": 0.4782728135585785, "time": 1.783406925201416, "epoch": 331, "memory": 36824, "step": 103590}
{"lr": 0.001086746964575562, "data_time": 0.002634477615356445, "grad_norm": 0.18549708425998687, "loss": 0.4810695439577103, "time": 1.819430160522461, "epoch": 332, "memory": 36824, "step": 103703}
{"lr": 0.0010857603855212261, "data_time": 0.002457141876220703, "grad_norm": 0.21535895019769669, "loss": 0.4828482210636139, "time": 1.826407790184021, "epoch": 332, "memory": 36824, "step": 103803}
{"lr": 0.001084773308057925, "data_time": 0.0029082298278808594, "grad_norm": 0.17845143526792526, "loss": 0.4755959868431091, "time": 1.7797899961471557, "epoch": 332, "memory": 36824, "step": 103903}
{"lr": 0.001083657312875815, "data_time": 0.025472688674926757, "grad_norm": 0.20392736047506332, "loss": 0.480867475271225, "time": 1.8271871089935303, "epoch": 333, "memory": 36824, "step": 104016}
{"lr": 0.0010826691795414111, "data_time": 0.0029833316802978516, "grad_norm": 0.20744574815034866, "loss": 0.4816928952932358, "time": 1.8581493139266967, "epoch": 333, "memory": 36824, "step": 104116}
{"lr": 0.0010816805531895682, "data_time": 0.0028022289276123046, "grad_norm": 0.18769733756780624, "loss": 0.4839559495449066, "time": 1.8362195014953613, "epoch": 333, "memory": 36824, "step": 104216}
{"lr": 0.0010805628142552305, "data_time": 0.24368486404418946, "grad_norm": 0.1991789996623993, "loss": 0.4762642323970795, "time": 1.7483130931854247, "epoch": 334, "memory": 36824, "step": 104329}
{"lr": 0.0010795731435252276, "data_time": 0.002651667594909668, "grad_norm": 0.19394033402204514, "loss": 0.48331486284732816, "time": 1.8367931604385377, "epoch": 334, "memory": 36824, "step": 104429}
{"lr": 0.0010785829851777387, "data_time": 0.002952432632446289, "grad_norm": 0.19916898757219315, "loss": 0.4791864603757858, "time": 1.7198251724243163, "epoch": 334, "memory": 36824, "step": 104529}
{"lr": 0.0010774635215902463, "data_time": 0.06657149791717529, "grad_norm": 0.19396315068006514, "loss": 0.4856929361820221, "time": 1.7892242908477782, "epoch": 335, "memory": 36824, "step": 104642}
{"lr": 0.0010764723303753906, "data_time": 0.00249326229095459, "grad_norm": 0.18025118261575698, "loss": 0.48022483885288236, "time": 1.8922698259353639, "epoch": 335, "memory": 36824, "step": 104742}
{"lr": 0.0010754806569513297, "data_time": 0.003532743453979492, "grad_norm": 0.19540956616401672, "loss": 0.4776217848062515, "time": 1.8898945808410645, "epoch": 335, "memory": 36824, "step": 104842}
{"lr": 0.0010743594878392273, "data_time": 0.3609552145004272, "grad_norm": 0.21388164907693863, "loss": 0.48183313608169553, "time": 1.8344010829925537, "epoch": 336, "memory": 36824, "step": 104955}
{"lr": 0.0010733667930762402, "data_time": 0.0025481939315795898, "grad_norm": 0.20024387836456298, "loss": 0.4833446443080902, "time": 1.848227572441101, "epoch": 336, "memory": 36824, "step": 105055}
{"lr": 0.001072373621520571, "data_time": 0.0028438568115234375, "grad_norm": 0.21971700787544252, "loss": 0.4799072206020355, "time": 1.8186779022216797, "epoch": 336, "memory": 36824, "step": 105155}
{"lr": 0.0010712507660415448, "data_time": 0.002493143081665039, "grad_norm": 0.19723085910081864, "loss": 0.48327529728412627, "time": 1.8237116575241088, "epoch": 337, "memory": 36824, "step": 105268}
{"lr": 0.00107025658469284, "data_time": 0.0024125099182128905, "grad_norm": 0.19204089194536209, "loss": 0.4805057555437088, "time": 1.7818014860153197, "epoch": 337, "memory": 36824, "step": 105368}
{"lr": 0.0010692619319761238, "data_time": 0.0031365871429443358, "grad_norm": 0.20194046795368195, "loss": 0.47584612369537355, "time": 1.764585590362549, "epoch": 337, "memory": 36824, "step": 105468}
{"lr": 0.0010681374093166785, "data_time": 0.14958927631378174, "grad_norm": 0.1839817523956299, "loss": 0.47890423238277435, "time": 1.8271512508392334, "epoch": 338, "memory": 36824, "step": 105581}
{"lr": 0.001067141758370072, "data_time": 0.0028407096862792967, "grad_norm": 0.20388736724853515, "loss": 0.47920921444892883, "time": 1.7912118434906006, "epoch": 338, "memory": 36824, "step": 105681}
{"lr": 0.0010661456414881793, "data_time": 0.0030256271362304687, "grad_norm": 0.1974166914820671, "loss": 0.478562992811203, "time": 1.7823668241500854, "epoch": 338, "memory": 36824, "step": 105781}
{"lr": 0.001065019470863301, "data_time": 0.17311108112335205, "grad_norm": 0.189491868019104, "loss": 0.4815263390541077, "time": 1.8233642816543578, "epoch": 339, "memory": 36824, "step": 105894}
{"lr": 0.0010640223673317227, "data_time": 0.0024524688720703124, "grad_norm": 0.19192690700292586, "loss": 0.48327485620975497, "time": 1.7795610427856445, "epoch": 339, "memory": 36824, "step": 105994}
{"lr": 0.0010630248033055422, "data_time": 0.002688908576965332, "grad_norm": 0.1838735520839691, "loss": 0.47841736674308777, "time": 1.7911830186843871, "epoch": 339, "memory": 36824, "step": 106094}
{"lr": 0.0010618970039583827, "data_time": 0.07824957370758057, "grad_norm": 0.20631297081708908, "loss": 0.479437392950058, "time": 1.8348208904266357, "epoch": 340, "memory": 36824, "step": 106207}
{"lr": 0.0010608984648795786, "data_time": 0.0026921510696411135, "grad_norm": 0.20371512621641158, "loss": 0.48272409439086916, "time": 1.8073471546173097, "epoch": 340, "memory": 36824, "step": 106307}
{"lr": 0.0010598994707547273, "data_time": 0.0032831430435180664, "grad_norm": 0.19757152646780013, "loss": 0.47865080535411836, "time": 1.8238849878311156, "epoch": 340, "memory": 36824, "step": 106407}
{"lr": 0.001058770061956264, "data_time": 0.05881984233856201, "grad_norm": 0.19901015907526015, "loss": 0.4805714666843414, "time": 1.851838803291321, "epoch": 341, "memory": 36824, "step": 106520}
{"lr": 0.0010577701043925118, "data_time": 0.002573108673095703, "grad_norm": 0.1903382882475853, "loss": 0.48007636666297915, "time": 1.8111811399459838, "epoch": 341, "memory": 36824, "step": 106620}
{"lr": 0.0010567696972390425, "data_time": 0.002772951126098633, "grad_norm": 0.17867380380630493, "loss": 0.480381840467453, "time": 1.7728437662124634, "epoch": 341, "memory": 36824, "step": 106720}
{"lr": 0.0010556386982877555, "data_time": 0.3934455156326294, "grad_norm": 0.17913425117731094, "loss": 0.4825063914060593, "time": 1.7837868452072143, "epoch": 342, "memory": 36824, "step": 106833}
{"lr": 0.0010546373393255735, "data_time": 0.027309155464172362, "grad_norm": 0.19810437262058259, "loss": 0.48129904866218565, "time": 1.8346424341201781, "epoch": 342, "memory": 36824, "step": 106933}
{"lr": 0.0010536355362376864, "data_time": 0.002913045883178711, "grad_norm": 0.18477419018745422, "loss": 0.47761958837509155, "time": 1.8154741525650024, "epoch": 342, "memory": 36824, "step": 107033}
{"lr": 0.0010525029664592276, "data_time": 0.0025206565856933593, "grad_norm": 0.1819385975599289, "loss": 0.4783410459756851, "time": 1.7863487482070923, "epoch": 343, "memory": 36824, "step": 107146}
{"lr": 0.0010515002232090748, "data_time": 0.0024689674377441407, "grad_norm": 0.2067517563700676, "loss": 0.47525611221790315, "time": 1.8898253679275512, "epoch": 343, "memory": 36824, "step": 107246}
{"lr": 0.0010504970413048188, "data_time": 0.0034995794296264647, "grad_norm": 0.19470818787813188, "loss": 0.48460336625576017, "time": 1.770344352722168, "epoch": 343, "memory": 36824, "step": 107346}
{"lr": 0.0010493629200516788, "data_time": 0.03283681869506836, "grad_norm": 0.18373986333608627, "loss": 0.48035026490688326, "time": 1.8631816148757934, "epoch": 344, "memory": 36824, "step": 107459}
{"lr": 0.0010483588096476726, "data_time": 0.0027162313461303713, "grad_norm": 0.19459937065839766, "loss": 0.47657279670238495, "time": 1.864941382408142, "epoch": 344, "memory": 36824, "step": 107559}
{"lr": 0.001047354266068659, "data_time": 0.0029093027114868164, "grad_norm": 0.19001485109329225, "loss": 0.47895256876945497, "time": 1.7868958950042724, "epoch": 344, "memory": 36824, "step": 107659}
{"lr": 0.0010462186127198445, "data_time": 0.0829172134399414, "grad_norm": 0.18318391740322112, "loss": 0.47998263537883756, "time": 1.8174281358718871, "epoch": 345, "memory": 36824, "step": 107772}
{"lr": 0.0010452131523194606, "data_time": 0.0024794340133666992, "grad_norm": 0.2183821588754654, "loss": 0.484317809343338, "time": 1.7982920408248901, "epoch": 345, "memory": 36824, "step": 107872}
{"lr": 0.0010442072642305664, "data_time": 0.0030303716659545897, "grad_norm": 0.3032063990831375, "loss": 0.4790930926799774, "time": 1.811744260787964, "epoch": 345, "memory": 36824, "step": 107972}
{"lr": 0.0010430700981912588, "data_time": 0.09916517734527588, "grad_norm": 0.20006529688835145, "loss": 0.47657846808433535, "time": 1.8371742486953735, "epoch": 346, "memory": 36824, "step": 108085}
{"lr": 0.001042063304975042, "data_time": 0.0026159763336181642, "grad_norm": 0.19685606211423873, "loss": 0.4804870456457138, "time": 1.8645768880844116, "epoch": 346, "memory": 36824, "step": 108185}
{"lr": 0.0010410560895641207, "data_time": 0.004116249084472656, "grad_norm": 0.2025374338030815, "loss": 0.4786626785993576, "time": 1.8324404239654541, "epoch": 346, "memory": 36824, "step": 108285}
{"lr": 0.0010399174302653492, "data_time": 0.022745466232299803, "grad_norm": 0.19385137856006623, "loss": 0.4809082508087158, "time": 1.911377763748169, "epoch": 347, "memory": 36824, "step": 108398}
{"lr": 0.0010389093214366164, "data_time": 0.0023835420608520506, "grad_norm": 0.2070680618286133, "loss": 0.48261103928089144, "time": 1.7697869062423706, "epoch": 347, "memory": 36824, "step": 108498}
{"lr": 0.0010379007959141998, "data_time": 0.0034202814102172853, "grad_norm": 0.22552781105041503, "loss": 0.4782377541065216, "time": 1.808321475982666, "epoch": 347, "memory": 36824, "step": 108598}
{"lr": 0.0010367606628125096, "data_time": 0.23968448638916015, "grad_norm": 0.19657166600227355, "loss": 0.4844703316688538, "time": 1.8047464609146118, "epoch": 348, "memory": 36824, "step": 108711}
{"lr": 0.0010357512555970593, "data_time": 0.06320819854736329, "grad_norm": 0.18627900779247283, "loss": 0.48118863105773924, "time": 1.8104024171829223, "epoch": 348, "memory": 36824, "step": 108811}
{"lr": 0.0010347414371960654, "data_time": 0.22937355041503907, "grad_norm": 0.20603908747434616, "loss": 0.480094188451767, "time": 1.857655143737793, "epoch": 348, "memory": 36824, "step": 108911}
{"lr": 0.0010335998497731868, "data_time": 0.10735762119293213, "grad_norm": 0.20068449825048446, "loss": 0.47486173212528227, "time": 1.8463757753372192, "epoch": 349, "memory": 36824, "step": 109024}
{"lr": 0.001032589161419005, "data_time": 0.0027150869369506835, "grad_norm": 0.19104156345129014, "loss": 0.47878185510635374, "time": 1.811758041381836, "epoch": 349, "memory": 36824, "step": 109124}
{"lr": 0.0010315780673944455, "data_time": 0.0027765989303588866, "grad_norm": 0.2005017951130867, "loss": 0.4781287759542465, "time": 1.8471292734146119, "epoch": 349, "memory": 36824, "step": 109224}
{"lr": 0.0010304350451569573, "data_time": 0.0608691930770874, "grad_norm": 0.21609181612730027, "loss": 0.4768299043178558, "time": 1.8057791233062743, "epoch": 350, "memory": 36824, "step": 109337}
{"lr": 0.001029423092933917, "data_time": 0.0027458906173706056, "grad_norm": 0.19345388263463975, "loss": 0.4821341305971146, "time": 1.8712092161178588, "epoch": 350, "memory": 36824, "step": 109437}
{"lr": 0.0010284107405625978, "data_time": 0.002958869934082031, "grad_norm": 0.20637684613466262, "loss": 0.4764446526765823, "time": 1.7851858615875245, "epoch": 350, "memory": 36824, "step": 109537}
{"lr": 0.0010272663030415948, "data_time": 0.09271373748779296, "grad_norm": 0.19065508395433425, "loss": 0.48054171204566953, "time": 1.806248092651367, "epoch": 351, "memory": 36824, "step": 109650}
{"lr": 0.0010262531042411697, "data_time": 0.0026148319244384765, "grad_norm": 0.19827525168657303, "loss": 0.47781493067741393, "time": 1.8763972520828247, "epoch": 351, "memory": 36824, "step": 109750}
{"lr": 0.0010252395108213996, "data_time": 0.002813911437988281, "grad_norm": 0.19780030697584153, "loss": 0.47929815351963045, "time": 1.7612928390502929, "epoch": 351, "memory": 36824, "step": 109850}
{"lr": 0.0010240936775721642, "data_time": 0.2044576644897461, "grad_norm": 0.1948131263256073, "loss": 0.481237456202507, "time": 1.8141350269317627, "epoch": 352, "memory": 36824, "step": 109963}
{"lr": 0.0010230792495071253, "data_time": 0.0028949737548828124, "grad_norm": 0.23715819418430328, "loss": 0.4773128479719162, "time": 1.832980251312256, "epoch": 352, "memory": 36824, "step": 110063}
{"lr": 0.0010220644323584221, "data_time": 0.0028172969818115235, "grad_norm": 0.20224223732948304, "loss": 0.47839112877845763, "time": 1.8470874071121215, "epoch": 352, "memory": 36824, "step": 110163}
{"lr": 0.0010209172229600808, "data_time": 0.16465530395507813, "grad_norm": 0.19395869076251984, "loss": 0.4801880568265915, "time": 1.9074843406677247, "epoch": 353, "memory": 36824, "step": 110276}
{"lr": 0.0010199015829642048, "data_time": 0.0024032354354858398, "grad_norm": 0.20677786320447922, "loss": 0.4826795905828476, "time": 1.8537420272827148, "epoch": 353, "memory": 36824, "step": 110376}
{"lr": 0.001018885559426992, "data_time": 0.002750992774963379, "grad_norm": 0.19620489031076432, "loss": 0.4790633350610733, "time": 1.8059757709503175, "epoch": 353, "memory": 36824, "step": 110476}
{"lr": 0.0010177369934821895, "data_time": 0.039995646476745604, "grad_norm": 0.19944582432508468, "loss": 0.48016331493854525, "time": 1.7915399074554443, "epoch": 354, "memory": 36824, "step": 110589}
{"lr": 0.0010167201589099614, "data_time": 0.0028510093688964844, "grad_norm": 0.19988194555044175, "loss": 0.4780103474855423, "time": 1.9099622249603272, "epoch": 354, "memory": 36824, "step": 110689}
{"lr": 0.0010157029463452763, "data_time": 0.0033160686492919923, "grad_norm": 0.2031185120344162, "loss": 0.48252038955688475, "time": 1.8258577823638915, "epoch": 354, "memory": 36824, "step": 110789}
{"lr": 0.0010145530434798357, "data_time": 0.003012943267822266, "grad_norm": 0.18050954192876817, "loss": 0.47616427540779116, "time": 1.8514822483062745, "epoch": 355, "memory": 36824, "step": 110902}
{"lr": 0.0010135350317061521, "data_time": 0.0028219699859619142, "grad_norm": 0.24765126407146454, "loss": 0.48008025288581846, "time": 1.8248719930648805, "epoch": 355, "memory": 36824, "step": 111002}
{"lr": 0.0010125166474953518, "data_time": 0.003380274772644043, "grad_norm": 0.22086791694164276, "loss": 0.48262921869754793, "time": 1.7770170450210572, "epoch": 355, "memory": 36824, "step": 111102}
{"lr": 0.001011365427357939, "data_time": 0.11103825569152832, "grad_norm": 0.20258521884679795, "loss": 0.4763501167297363, "time": 1.8185975313186646, "epoch": 356, "memory": 36824, "step": 111215}
{"lr": 0.001010346255777814, "data_time": 0.0027335166931152345, "grad_norm": 0.20695033222436904, "loss": 0.4797081142663956, "time": 1.8151351451873778, "epoch": 356, "memory": 36824, "step": 111315}
{"lr": 0.001009326717322275, "data_time": 0.003527712821960449, "grad_norm": 0.2248741552233696, "loss": 0.4779272019863129, "time": 1.8220667839050293, "epoch": 356, "memory": 36824, "step": 111415}
{"lr": 0.001008174199584068, "data_time": 0.03295724391937256, "grad_norm": 0.24121529906988143, "loss": 0.4786725163459778, "time": 1.8020812749862671, "epoch": 357, "memory": 36824, "step": 111528}
{"lr": 0.0010071538856123313, "data_time": 0.0024808645248413086, "grad_norm": 0.21034673303365709, "loss": 0.481206139922142, "time": 1.9897016763687134, "epoch": 357, "memory": 36824, "step": 111628}
{"lr": 0.0010061332103331528, "data_time": 0.003246617317199707, "grad_norm": 0.2140180304646492, "loss": 0.4751867204904556, "time": 1.7802065849304198, "epoch": 357, "memory": 36824, "step": 111728}
{"lr": 0.0010049794146874977, "data_time": 0.3234552383422852, "grad_norm": 0.20687088072299958, "loss": 0.47768384516239165, "time": 1.8180880069732666, "epoch": 358, "memory": 36824, "step": 111841}
{"lr": 0.0010039579757584987, "data_time": 0.002565336227416992, "grad_norm": 0.20742193907499312, "loss": 0.4785638302564621, "time": 1.8291305303573608, "epoch": 358, "memory": 36824, "step": 111941}
{"lr": 0.0010029361810962052, "data_time": 0.0029317378997802735, "grad_norm": 0.20662544518709183, "loss": 0.4769886016845703, "time": 1.7937784671783448, "epoch": 358, "memory": 36824, "step": 112041}
{"lr": 0.0010017811272582872, "data_time": 0.002653646469116211, "grad_norm": 0.20613109767436982, "loss": 0.4784657210111618, "time": 1.811898398399353, "epoch": 359, "memory": 36824, "step": 112154}
{"lr": 0.0010007585808255999, "data_time": 0.0023856163024902344, "grad_norm": 0.21501532346010208, "loss": 0.4732423067092896, "time": 2.1755763053894044, "epoch": 359, "memory": 36824, "step": 112254}
{"lr": 0.0009997356842398426, "data_time": 0.0034937143325805666, "grad_norm": 0.2188422352075577, "loss": 0.47276746928691865, "time": 1.8051153659820556, "epoch": 359, "memory": 36824, "step": 112354}
{"lr": 0.0009985793919463444, "data_time": 0.34885320663452146, "grad_norm": 0.2349689096212387, "loss": 0.47601754069328306, "time": 1.827273678779602, "epoch": 360, "memory": 36824, "step": 112467}
{"lr": 0.0009975557554824676, "data_time": 0.002501225471496582, "grad_norm": 0.20863329619169235, "loss": 0.4768527090549469, "time": 1.8241635084152221, "epoch": 360, "memory": 36824, "step": 112567}
{"lr": 0.0009965317744517253, "data_time": 0.0033733129501342775, "grad_norm": 0.21624999791383742, "loss": 0.48038102984428405, "time": 1.8028173685073852, "epoch": 360, "memory": 36824, "step": 112667}
{"lr": 0.0009953742634604913, "data_time": 0.002617168426513672, "grad_norm": 0.20432577580213546, "loss": 0.48149007856845855, "time": 1.8053994178771973, "epoch": 361, "memory": 36824, "step": 112780}
{"lr": 0.000994349554456546, "data_time": 0.003041195869445801, "grad_norm": 0.20047164261341094, "loss": 0.48105118572711947, "time": 2.1795576095581053, "epoch": 361, "memory": 36824, "step": 112880}
{"lr": 0.0009933245064778318, "data_time": 0.0037116289138793947, "grad_norm": 0.19738101363182067, "loss": 0.4764262199401855, "time": 1.7889044046401978, "epoch": 361, "memory": 36824, "step": 112980}
{"lr": 0.0009921657965675324, "data_time": 0.1468461275100708, "grad_norm": 0.19692746847867965, "loss": 0.48405260443687437, "time": 1.8548696041107178, "epoch": 362, "memory": 36824, "step": 113093}
{"lr": 0.0009911400325329674, "data_time": 0.0026552677154541016, "grad_norm": 0.19969021677970886, "loss": 0.4791125297546387, "time": 1.82805335521698, "epoch": 362, "memory": 36824, "step": 113193}
{"lr": 0.0009901139351215227, "data_time": 0.003789854049682617, "grad_norm": 0.20894947350025178, "loss": 0.4780891567468643, "time": 1.884396767616272, "epoch": 362, "memory": 36824, "step": 113293}
{"lr": 0.0009889540460913126, "data_time": 0.1989518404006958, "grad_norm": 0.19406476020812988, "loss": 0.4802081882953644, "time": 1.8531024217605592, "epoch": 363, "memory": 36824, "step": 113406}
{"lr": 0.000987927244553608, "data_time": 0.002534151077270508, "grad_norm": 0.21617647856473923, "loss": 0.4784320890903473, "time": 1.9740056037902831, "epoch": 363, "memory": 36824, "step": 113506}
{"lr": 0.0009869001152426062, "data_time": 0.0038938999176025392, "grad_norm": 0.20032401531934738, "loss": 0.48294604420661924, "time": 2.1039042472839355, "epoch": 363, "memory": 36824, "step": 113606}
{"lr": 0.0009857390669117888, "data_time": 0.002377009391784668, "grad_norm": 0.20103194415569306, "loss": 0.47997662127017976, "time": 1.8383896112442017, "epoch": 364, "memory": 36824, "step": 113719}
{"lr": 0.000984711245416147, "data_time": 0.0025903940200805663, "grad_norm": 0.23255195915699006, "loss": 0.4815374821424484, "time": 1.8100932359695434, "epoch": 364, "memory": 36824, "step": 113819}
{"lr": 0.000983683101756394, "data_time": 0.003069901466369629, "grad_norm": 0.2093813806772232, "loss": 0.48224442899227143, "time": 1.781031608581543, "epoch": 364, "memory": 36824, "step": 113919}
{"lr": 0.000982520913964083, "data_time": 0.07210571765899658, "grad_norm": 0.20750218331813813, "loss": 0.4753704160451889, "time": 1.8154533624649047, "epoch": 365, "memory": 36824, "step": 114032}
{"lr": 0.0009814920900731378, "data_time": 0.002948427200317383, "grad_norm": 0.20263583809137345, "loss": 0.48164507150650027, "time": 1.8961746215820312, "epoch": 365, "memory": 36824, "step": 114132}
{"lr": 0.0009804629496327703, "data_time": 0.0029224157333374023, "grad_norm": 0.1897310808300972, "loss": 0.48039394319057466, "time": 1.8323723316192626, "epoch": 365, "memory": 36824, "step": 114232}
{"lr": 0.0009792996422375477, "data_time": 0.29313061237335203, "grad_norm": 0.21184461414813996, "loss": 0.47919191122055055, "time": 1.7990751266479492, "epoch": 366, "memory": 36824, "step": 114345}
{"lr": 0.000978269833531063, "data_time": 0.0032529592514038085, "grad_norm": 0.2009409934282303, "loss": 0.47697209417819975, "time": 1.7774668693542481, "epoch": 366, "memory": 36824, "step": 114445}
{"lr": 0.000977239713895254, "data_time": 0.002858281135559082, "grad_norm": 0.20433245599269867, "loss": 0.4772475153207779, "time": 1.8444108486175537, "epoch": 366, "memory": 36824, "step": 114545}
{"lr": 0.0009760753067748332, "data_time": 0.07613072395324708, "grad_norm": 0.21131302267313004, "loss": 0.477936190366745, "time": 1.788827133178711, "epoch": 367, "memory": 36824, "step": 114658}
{"lr": 0.0009750445308493964, "data_time": 0.010352039337158203, "grad_norm": 0.20211615711450576, "loss": 0.48256802260875703, "time": 2.1088147163391113, "epoch": 367, "memory": 36824, "step": 114758}
{"lr": 0.0009740134496200476, "data_time": 0.0035794734954833984, "grad_norm": 0.24370242059230804, "loss": 0.4844677925109863, "time": 1.8085033655166627, "epoch": 367, "memory": 36824, "step": 114858}
{"lr": 0.0009728479626709337, "data_time": 0.002419304847717285, "grad_norm": 0.21872652024030687, "loss": 0.48440442979335785, "time": 1.8400516033172607, "epoch": 368, "memory": 36824, "step": 114971}
{"lr": 0.0009718162371396629, "data_time": 0.0025160789489746095, "grad_norm": 0.20588043332099915, "loss": 0.47565790116786955, "time": 1.7430715322494508, "epoch": 368, "memory": 36824, "step": 115071}
{"lr": 0.0009707842119351081, "data_time": 0.002995729446411133, "grad_norm": 0.2237464115023613, "loss": 0.4773965299129486, "time": 1.7838446378707886, "epoch": 368, "memory": 36824, "step": 115171}
{"lr": 0.0009696176650722569, "data_time": 0.08213088512420655, "grad_norm": 0.20342991054058074, "loss": 0.48116073608398435, "time": 1.8181955337524414, "epoch": 369, "memory": 36824, "step": 115284}
{"lr": 0.0009685850075644933, "data_time": 0.003230643272399902, "grad_norm": 0.21301790326833725, "loss": 0.4804782301187515, "time": 2.115739631652832, "epoch": 369, "memory": 36824, "step": 115384}
{"lr": 0.0009675520560191958, "data_time": 0.0036934375762939452, "grad_norm": 0.19716049283742904, "loss": 0.4796850383281708, "time": 1.7731683731079102, "epoch": 369, "memory": 36824, "step": 115484}
{"lr": 0.0009663844691756753, "data_time": 0.2765392780303955, "grad_norm": 0.2011558935046196, "loss": 0.4784715861082077, "time": 1.7617913484573364, "epoch": 370, "memory": 36824, "step": 115597}
{"lr": 0.0009653508973366877, "data_time": 0.0030204057693481445, "grad_norm": 0.21079792231321334, "loss": 0.47963271737098695, "time": 1.747318410873413, "epoch": 370, "memory": 36824, "step": 115697}
{"lr": 0.0009643170371009377, "data_time": 0.0032996416091918947, "grad_norm": 0.2099731147289276, "loss": 0.4752771556377411, "time": 1.8727036237716674, "epoch": 370, "memory": 36824, "step": 115797}
{"lr": 0.0009631484302275843, "data_time": 0.12114486694335938, "grad_norm": 0.21694259345531464, "loss": 0.47940964102745054, "time": 1.7987212419509888, "epoch": 371, "memory": 36824, "step": 115910}
{"lr": 0.0009621139617182624, "data_time": 0.0025139570236206053, "grad_norm": 0.21691120266914368, "loss": 0.4800281226634979, "time": 2.0561030864715577, "epoch": 371, "memory": 36824, "step": 116010}
{"lr": 0.0009610792104578816, "data_time": 0.0028792858123779298, "grad_norm": 0.19872231483459474, "loss": 0.48143393397331236, "time": 1.9457457542419434, "epoch": 371, "memory": 36824, "step": 116110}
{"lr": 0.0009599096035229642, "data_time": 0.0024698495864868162, "grad_norm": 0.19896496683359147, "loss": 0.4807865768671036, "time": 1.7736723661422729, "epoch": 372, "memory": 36824, "step": 116223}
{"lr": 0.0009588742560195218, "data_time": 0.002589249610900879, "grad_norm": 0.20343417823314666, "loss": 0.47668608725070954, "time": 1.7637780904769897, "epoch": 372, "memory": 36824, "step": 116323}
{"lr": 0.0009578386314155517, "data_time": 0.0027657270431518553, "grad_norm": 0.20377449244260787, "loss": 0.4795164704322815, "time": 1.849577260017395, "epoch": 372, "memory": 36824, "step": 116423}
{"lr": 0.0009566680444044249, "data_time": 0.0028985261917114256, "grad_norm": 0.19976505935192107, "loss": 0.48151050209999086, "time": 1.7875927925109862, "epoch": 373, "memory": 36824, "step": 116536}
{"lr": 0.0009556318355980933, "data_time": 0.0025609731674194336, "grad_norm": 0.1954491838812828, "loss": 0.4805641621351242, "time": 1.9391116857528687, "epoch": 373, "memory": 36824, "step": 116636}
{"lr": 0.000954595355346502, "data_time": 0.0029313325881958007, "grad_norm": 0.19641733467578887, "loss": 0.4809404522180557, "time": 1.7901141881942748, "epoch": 373, "memory": 36824, "step": 116736}
{"lr": 0.0009534238082612683, "data_time": 0.020410871505737303, "grad_norm": 0.25936868190765383, "loss": 0.48436650931835173, "time": 1.7743800163269043, "epoch": 374, "memory": 36824, "step": 116849}
{"lr": 0.0009523867558579971, "data_time": 0.0031500339508056642, "grad_norm": 0.22556446492671967, "loss": 0.4818781167268753, "time": 1.8254725694656373, "epoch": 374, "memory": 36824, "step": 116949}
{"lr": 0.0009513494376693721, "data_time": 0.0035793542861938476, "grad_norm": 0.22314475625753402, "loss": 0.47941150665283205, "time": 1.8206866264343262, "epoch": 374, "memory": 36824, "step": 117049}
{"lr": 0.0009501769505285393, "data_time": 0.00272221565246582, "grad_norm": 0.21620857566595078, "loss": 0.48247069120407104, "time": 1.7852322101593017, "epoch": 375, "memory": 36824, "step": 117162}
{"lr": 0.0009491390722486943, "data_time": 0.0027860164642333984, "grad_norm": 0.20609556436538695, "loss": 0.4815301388502121, "time": 2.1453284740448, "epoch": 375, "memory": 36824, "step": 117262}
{"lr": 0.0009481009338479414, "data_time": 0.003715229034423828, "grad_norm": 0.20672389715909958, "loss": 0.4766244858503342, "time": 1.8910260915756225, "epoch": 375, "memory": 36824, "step": 117362}
{"lr": 0.0009469275266860782, "data_time": 0.15752906799316407, "grad_norm": 0.2511543020606041, "loss": 0.4758277893066406, "time": 1.7602821111679077, "epoch": 376, "memory": 36824, "step": 117475}
{"lr": 0.0009458888402641336, "data_time": 0.002857542037963867, "grad_norm": 0.197084242105484, "loss": 0.4741519600152969, "time": 1.866085410118103, "epoch": 376, "memory": 36824, "step": 117575}
{"lr": 0.0009448498993901739, "data_time": 0.0029763221740722657, "grad_norm": 0.2508618235588074, "loss": 0.4833650946617126, "time": 1.8237364292144775, "epoch": 376, "memory": 36824, "step": 117675}
{"lr": 0.0009436755922575728, "data_time": 0.1274726629257202, "grad_norm": 0.2025998443365097, "loss": 0.4781349629163742, "time": 1.7762561559677124, "epoch": 377, "memory": 36824, "step": 117788}
{"lr": 0.0009426361154418142, "data_time": 0.002766323089599609, "grad_norm": 0.22139137834310532, "loss": 0.47546028196811674, "time": 2.222616195678711, "epoch": 377, "memory": 36824, "step": 117888}
{"lr": 0.0009415963898472804, "data_time": 0.003367972373962402, "grad_norm": 0.19372270852327347, "loss": 0.48278799653053284, "time": 1.8054617643356323, "epoch": 377, "memory": 36824, "step": 117988}
{"lr": 0.0009404212028096087, "data_time": 0.3714572906494141, "grad_norm": 0.1965080127120018, "loss": 0.47821650207042693, "time": 1.7977033376693725, "epoch": 378, "memory": 36824, "step": 118101}
{"lr": 0.0009393809533618281, "data_time": 0.057650971412658694, "grad_norm": 0.24737460315227508, "loss": 0.4793336123228073, "time": 1.8315686225891112, "epoch": 378, "memory": 36824, "step": 118201}
{"lr": 0.0009383404608127627, "data_time": 0.0035762786865234375, "grad_norm": 0.21710032671689988, "loss": 0.4789982050657272, "time": 1.7808197736740112, "epoch": 378, "memory": 36824, "step": 118301}
{"lr": 0.000937164413950726, "data_time": 0.2717307090759277, "grad_norm": 0.22687683999538422, "loss": 0.4757500857114792, "time": 1.8794227123260498, "epoch": 379, "memory": 36824, "step": 118414}
{"lr": 0.0009361234096459151, "data_time": 0.0028660058975219726, "grad_norm": 0.2446762055158615, "loss": 0.47731572687625884, "time": 2.128429341316223, "epoch": 379, "memory": 36824, "step": 118514}
{"lr": 0.0009350821679214639, "data_time": 0.002875876426696777, "grad_norm": 0.21217418760061263, "loss": 0.48392433822155, "time": 1.7025299787521362, "epoch": 379, "memory": 36824, "step": 118614}
{"lr": 0.000933905281330457, "data_time": 0.26048779487609863, "grad_norm": 0.19961887151002883, "loss": 0.48059176802635195, "time": 1.8371104001998901, "epoch": 380, "memory": 36824, "step": 118727}
{"lr": 0.0009328635399565066, "data_time": 0.00275881290435791, "grad_norm": 0.20084037631750107, "loss": 0.48040037751197817, "time": 1.8663586854934693, "epoch": 380, "memory": 36824, "step": 118827}
{"lr": 0.0009318215668486182, "data_time": 0.003062319755554199, "grad_norm": 0.2523766249418259, "loss": 0.47871534526348114, "time": 1.8518067121505737, "epoch": 380, "memory": 36824, "step": 118927}
{"lr": 0.0009306438606383867, "data_time": 0.2478435754776001, "grad_norm": 0.2086058184504509, "loss": 0.4796182721853256, "time": 1.8341127395629884, "epoch": 381, "memory": 36824, "step": 119040}
{"lr": 0.0009296013999957845, "data_time": 0.00249788761138916, "grad_norm": 0.2241950273513794, "loss": 0.4754644185304642, "time": 1.7500690460205077, "epoch": 381, "memory": 36824, "step": 119140}
{"lr": 0.000928558713308904, "data_time": 0.004002285003662109, "grad_norm": 0.20166335850954056, "loss": 0.47747263610363005, "time": 1.8056371450424193, "epoch": 381, "memory": 36824, "step": 119240}
{"lr": 0.0009273802076031972, "data_time": 0.14729769229888917, "grad_norm": 0.21362135112285613, "loss": 0.4779976814985275, "time": 1.8094907999038696, "epoch": 382, "memory": 36824, "step": 119353}
{"lr": 0.0009263370455047184, "data_time": 0.0025505781173706054, "grad_norm": 0.21155525296926497, "loss": 0.4751546174287796, "time": 1.8073300838470459, "epoch": 382, "memory": 36824, "step": 119453}
{"lr": 0.0009252936630554855, "data_time": 0.0035308122634887694, "grad_norm": 0.24442106187343599, "loss": 0.4792559713125229, "time": 1.7390109777450562, "epoch": 382, "memory": 36824, "step": 119553}
{"lr": 0.0009241143779917146, "data_time": 0.11446139812469483, "grad_norm": 0.20920203030109405, "loss": 0.47799198031425477, "time": 1.8225435256958007, "epoch": 383, "memory": 36824, "step": 119666}
{"lr": 0.0009230705322621201, "data_time": 0.0026167154312133787, "grad_norm": Infinity, "loss": 0.48324366807937624, "time": 1.841362977027893, "epoch": 383, "memory": 36824, "step": 119766}
{"lr": 0.0009220264718790621, "data_time": 0.003560924530029297, "grad_norm": 0.20891675502061843, "loss": 0.4715610921382904, "time": 1.739072322845459, "epoch": 383, "memory": 36824, "step": 119866}
{"lr": 0.0009208464276079544, "data_time": 0.18182060718536378, "grad_norm": 0.19571772664785386, "loss": 0.47806845009326937, "time": 1.8206657886505127, "epoch": 384, "memory": 36824, "step": 119979}
{"lr": 0.0009198019160836889, "data_time": 0.0024842262268066407, "grad_norm": 0.251231287419796, "loss": 0.47991605997085574, "time": 1.7874483108520507, "epoch": 384, "memory": 36824, "step": 120079}
{"lr": 0.0009187571956069165, "data_time": 0.0030713319778442384, "grad_norm": 0.20698654353618623, "loss": 0.48237050175666807, "time": 1.7931704998016358, "epoch": 384, "memory": 36824, "step": 120179}
{"lr": 0.0009175764122921731, "data_time": 0.0026406764984130858, "grad_norm": 0.2409170761704445, "loss": 0.48171519935131074, "time": 1.8500516891479493, "epoch": 385, "memory": 36824, "step": 120292}
{"lr": 0.0009165312528210572, "data_time": 0.0025961637496948243, "grad_norm": 0.2224734291434288, "loss": 0.48051508963108064, "time": 1.8108367443084716, "epoch": 385, "memory": 36824, "step": 120392}
{"lr": 0.0009154858901019603, "data_time": 0.0031797409057617186, "grad_norm": 0.20569613128900527, "loss": 0.48095524609088897, "time": 1.8050110340118408, "epoch": 385, "memory": 36824, "step": 120492}
{"lr": 0.0009143043879199103, "data_time": 0.036087656021118165, "grad_norm": 0.2232244700193405, "loss": 0.47876531183719634, "time": 1.8375168800354005, "epoch": 386, "memory": 36824, "step": 120605}
{"lr": 0.0009132585983608354, "data_time": 0.0023917198181152345, "grad_norm": 0.22107139676809312, "loss": 0.4887403309345245, "time": 1.7895992517471313, "epoch": 386, "memory": 36824, "step": 120705}
{"lr": 0.0009122126112617804, "data_time": 0.004051423072814942, "grad_norm": 0.20714318752288818, "loss": 0.48226293325424197, "time": 1.7380509853363038, "epoch": 386, "memory": 36824, "step": 120805}
{"lr": 0.000911030410401036, "data_time": 0.42787833213806153, "grad_norm": 0.20298438221216203, "loss": 0.4791885703802109, "time": 1.7958978176116944, "epoch": 387, "memory": 36824, "step": 120918}
{"lr": 0.0009099840086236583, "data_time": 0.0026921510696411135, "grad_norm": 0.19576168954372405, "loss": 0.4782903790473938, "time": 1.7552555322647094, "epoch": 387, "memory": 36824, "step": 121018}
{"lr": 0.0009089374150176774, "data_time": 0.003094124794006348, "grad_norm": 0.20790403038263322, "loss": 0.47627863585948943, "time": 1.7741087198257446, "epoch": 387, "memory": 36824, "step": 121118}
{"lr": 0.0009077545356787898, "data_time": 0.2200897216796875, "grad_norm": 0.23972384631633759, "loss": 0.4770080059766769, "time": 1.7869312286376953, "epoch": 388, "memory": 36824, "step": 121231}
{"lr": 0.0009067075395632308, "data_time": 0.002573990821838379, "grad_norm": 0.20719732493162155, "loss": 0.4810693025588989, "time": 1.8753771543502809, "epoch": 388, "memory": 36824, "step": 121331}
{"lr": 0.0009056603573337204, "data_time": 0.0035729408264160156, "grad_norm": 0.23122700899839402, "loss": 0.48215084075927733, "time": 1.7639434099197389, "epoch": 388, "memory": 36824, "step": 121431}
{"lr": 0.0009044768197288338, "data_time": 0.003086376190185547, "grad_norm": 0.24611250311136246, "loss": 0.478873610496521, "time": 1.9516626596450806, "epoch": 389, "memory": 36824, "step": 121544}
{"lr": 0.0009034292471653676, "data_time": 0.002953052520751953, "grad_norm": 0.21387720108032227, "loss": 0.47727493941783905, "time": 1.7906366348266602, "epoch": 389, "memory": 36824, "step": 121644}
{"lr": 0.0009023814942057837, "data_time": 0.004033660888671875, "grad_norm": 0.22975426763296128, "loss": 0.4800491899251938, "time": 2.2168274879455567, "epoch": 389, "memory": 36824, "step": 121744}
{"lr": 0.0009011973185582895, "data_time": 0.03521547317504883, "grad_norm": 0.2498285800218582, "loss": 0.48057926297187803, "time": 1.805303955078125, "epoch": 390, "memory": 36824, "step": 121857}
{"lr": 0.0009001491874470406, "data_time": 0.0025496721267700196, "grad_norm": 0.20692259222269058, "loss": 0.4803385317325592, "time": 1.845677089691162, "epoch": 390, "memory": 36824, "step": 121957}
{"lr": 0.0008991008816605905, "data_time": 0.0027242183685302736, "grad_norm": 0.2131015256047249, "loss": 0.47762114405632017, "time": 1.7752106189727783, "epoch": 390, "memory": 36824, "step": 122057}
{"lr": 0.0008979160882047844, "data_time": 0.5443514585494995, "grad_norm": 0.23957151621580125, "loss": 0.4787700116634369, "time": 1.8039873361587524, "epoch": 391, "memory": 36824, "step": 122170}
{"lr": 0.0008968674164554223, "data_time": 0.15302915573120118, "grad_norm": 0.19946565181016923, "loss": 0.4828452080488205, "time": 1.8866949319839477, "epoch": 391, "memory": 36824, "step": 122270}
{"lr": 0.000895818575754759, "data_time": 0.0028331756591796877, "grad_norm": 0.21674385368824006, "loss": 0.4773328244686127, "time": 1.858023476600647, "epoch": 391, "memory": 36824, "step": 122370}
{"lr": 0.0008946331847354912, "data_time": 0.26749677658081056, "grad_norm": 0.2185409516096115, "loss": 0.4768721848726273, "time": 1.8154306411743164, "epoch": 392, "memory": 36824, "step": 122483}
{"lr": 0.0008935839902669217, "data_time": 0.00278325080871582, "grad_norm": 0.244732129573822, "loss": 0.47409641444683076, "time": 1.8553360939025878, "epoch": 392, "memory": 36824, "step": 122583}
{"lr": 0.0008925346325738393, "data_time": 0.003211355209350586, "grad_norm": 0.20895641148090363, "loss": 0.4774423480033875, "time": 1.803497076034546, "epoch": 392, "memory": 36824, "step": 122683}
{"lr": 0.0008913486642461716, "data_time": 0.03258910179138184, "grad_norm": 0.2053962379693985, "loss": 0.47386960685253143, "time": 1.7985253572463988, "epoch": 393, "memory": 36824, "step": 122796}
{"lr": 0.0008902989649862329, "data_time": 0.0027068614959716796, "grad_norm": 0.20748091042041777, "loss": 0.4820818454027176, "time": 1.848731803894043, "epoch": 393, "memory": 36824, "step": 122896}
{"lr": 0.0008892491082313591, "data_time": 0.0030220985412597657, "grad_norm": 0.208020381629467, "loss": 0.4750855952501297, "time": 1.816575789451599, "epoch": 393, "memory": 36824, "step": 122996}
{"lr": 0.0008880625828602196, "data_time": 0.2981908082962036, "grad_norm": 0.2312813088297844, "loss": 0.471574530005455, "time": 1.7893991470336914, "epoch": 394, "memory": 36824, "step": 123109}
{"lr": 0.0008870123967453777, "data_time": 0.003192448616027832, "grad_norm": 0.21320626586675645, "loss": 0.48440939784049986, "time": 1.8324819803237915, "epoch": 394, "memory": 36824, "step": 123209}
{"lr": 0.0008859620588678685, "data_time": 0.0029822826385498048, "grad_norm": 0.22066383361816405, "loss": 0.477419576048851, "time": 1.764144778251648, "epoch": 394, "memory": 36824, "step": 123309}
{"lr": 0.0008847749967277009, "data_time": 0.2793224573135376, "grad_norm": 0.24662939608097076, "loss": 0.4737082600593567, "time": 1.7935011863708497, "epoch": 395, "memory": 36824, "step": 123422}
{"lr": 0.0008837243417027387, "data_time": 0.0026073694229125977, "grad_norm": 0.22496674358844757, "loss": 0.4808811992406845, "time": 1.8152610540390015, "epoch": 395, "memory": 36824, "step": 123522}
{"lr": 0.0008826735406499695, "data_time": 0.003046584129333496, "grad_norm": 0.2012936607003212, "loss": 0.4814982503652573, "time": 1.771873140335083, "epoch": 395, "memory": 36824, "step": 123622}
{"lr": 0.0008814859620243908, "data_time": 0.0026094913482666016, "grad_norm": 0.210521100461483, "loss": 0.4789292484521866, "time": 1.8165982723236085, "epoch": 396, "memory": 36824, "step": 123735}
{"lr": 0.0008804348560421032, "data_time": 0.0026561498641967775, "grad_norm": 0.21070728898048402, "loss": 0.47968476414680483, "time": 1.8968740463256837, "epoch": 396, "memory": 36824, "step": 123835}
{"lr": 0.0008793836097693635, "data_time": 0.0035361766815185545, "grad_norm": 0.22517005503177642, "loss": 0.47550801932811737, "time": 1.7389610767364503, "epoch": 396, "memory": 36824, "step": 123935}
{"lr": 0.0008781955349508153, "data_time": 0.11289305686950683, "grad_norm": 0.22288366854190828, "loss": 0.48441788256168367, "time": 1.8422695636749267, "epoch": 397, "memory": 36824, "step": 124048}
{"lr": 0.0008771439959717033, "data_time": 0.002730250358581543, "grad_norm": 0.2398986890912056, "loss": 0.4822471261024475, "time": 1.8636175394058228, "epoch": 397, "memory": 36824, "step": 124148}
{"lr": 0.000876092322441893, "data_time": 0.0034936189651489256, "grad_norm": 0.2117721378803253, "loss": 0.47369020581245425, "time": 1.8210468530654906, "epoch": 397, "memory": 36824, "step": 124248}
{"lr": 0.0008749037717312968, "data_time": 0.08316893577575683, "grad_norm": 0.22418634593486786, "loss": 0.47719382047653197, "time": 1.8257940053939818, "epoch": 398, "memory": 36824, "step": 124361}
{"lr": 0.00087385181772326, "data_time": 0.002758169174194336, "grad_norm": 0.23112519681453705, "loss": 0.4751058965921402, "time": 1.8654004096984864, "epoch": 398, "memory": 36824, "step": 124461}
{"lr": 0.0008727997349065781, "data_time": 0.003568005561828613, "grad_norm": 0.24232703149318696, "loss": 0.4780201703310013, "time": 1.8472697496414185, "epoch": 398, "memory": 36824, "step": 124561}
{"lr": 0.0008716107286129866, "data_time": 0.003138113021850586, "grad_norm": 0.2127714306116104, "loss": 0.4807512789964676, "time": 1.8015178203582765, "epoch": 399, "memory": 36824, "step": 124674}
{"lr": 0.0008705583775510168, "data_time": 0.0026682138442993162, "grad_norm": 0.20935071557760238, "loss": 0.4777831733226776, "time": 1.8152342796325684, "epoch": 399, "memory": 36824, "step": 124774}
{"lr": 0.0008695059034246558, "data_time": 0.0031115055084228516, "grad_norm": 0.2273298606276512, "loss": 0.47857103049755095, "time": 1.7941713094711305, "epoch": 399, "memory": 36824, "step": 124874}
{"lr": 0.0008683164618649053, "data_time": 0.23400423526763917, "grad_norm": 0.2079762727022171, "loss": 0.4802206039428711, "time": 1.8528979778289796, "epoch": 400, "memory": 36824, "step": 124987}
{"lr": 0.0008672637317307794, "data_time": 0.0023912429809570313, "grad_norm": 0.21598411500453948, "loss": 0.4746743828058243, "time": 1.808842921257019, "epoch": 400, "memory": 36824, "step": 125087}
{"lr": 0.0008662108842786181, "data_time": 0.0028388261795043944, "grad_norm": 0.21969362944364548, "loss": 0.47967808842659, "time": 1.9150490522384644, "epoch": 400, "memory": 36824, "step": 125187}
{"lr": 0.0008650210277769819, "data_time": 0.24286136627197266, "grad_norm": 0.21450575888156892, "loss": 0.47246179580688474, "time": 2.1859407663345336, "epoch": 401, "memory": 36824, "step": 125300}
{"lr": 0.0008639679365589522, "data_time": 0.0033422470092773437, "grad_norm": 0.2209816887974739, "loss": 0.48176192343235014, "time": 1.9521691799163818, "epoch": 401, "memory": 36824, "step": 125400}
{"lr": 0.0008629147337712492, "data_time": 0.0038481712341308593, "grad_norm": 0.21551358997821807, "loss": 0.47767841517925264, "time": 1.8730887651443482, "epoch": 401, "memory": 36824, "step": 125500}
{"lr": 0.0008617244826590933, "data_time": 0.18998472690582274, "grad_norm": 0.22136219292879106, "loss": 0.481632736325264, "time": 1.8799336671829223, "epoch": 402, "memory": 36824, "step": 125613}
{"lr": 0.0008606710483515847, "data_time": 0.0026159763336181642, "grad_norm": 0.20998927801847458, "loss": 0.47730142772197726, "time": 1.9014257192611694, "epoch": 402, "memory": 36824, "step": 125713}
{"lr": 0.0008596175082246692, "data_time": 0.002825808525085449, "grad_norm": 0.22099228501319884, "loss": 0.4796775281429291, "time": 1.8561466932296753, "epoch": 402, "memory": 36824, "step": 125813}
{"lr": 0.0008584268828401007, "data_time": 0.5749176025390625, "grad_norm": 0.23720054179430008, "loss": 0.4754694789648056, "time": 2.1300003290176392, "epoch": 403, "memory": 36824, "step": 125926}
{"lr": 0.0008573731234433971, "data_time": 0.002920985221862793, "grad_norm": 0.2139048159122467, "loss": 0.4791505753993988, "time": 1.7365992784500122, "epoch": 403, "memory": 36824, "step": 126026}
{"lr": 0.0008563192639793626, "data_time": 0.003066682815551758, "grad_norm": 0.21661538779735565, "loss": 0.47664385437965395, "time": 2.0132123470306396, "epoch": 403, "memory": 36824, "step": 126126}
{"lr": 0.0008551282846668851, "data_time": 0.17638802528381348, "grad_norm": 0.21759318858385085, "loss": 0.4835237324237823, "time": 1.873998260498047, "epoch": 404, "memory": 36824, "step": 126239}
{"lr": 0.0008540742181868278, "data_time": 0.0025681018829345702, "grad_norm": 0.23429151773452758, "loss": 0.4763458549976349, "time": 1.7792778730392456, "epoch": 404, "memory": 36824, "step": 126339}
{"lr": 0.0008530200573932252, "data_time": 0.0031726837158203127, "grad_norm": 0.21786738485097884, "loss": 0.4801092028617859, "time": 2.2740123748779295, "epoch": 404, "memory": 36824, "step": 126439}
{"lr": 0.0008518287445033872, "data_time": 0.23013813495635987, "grad_norm": 0.23818674832582473, "loss": 0.4776824235916138, "time": 1.8611753463745118, "epoch": 405, "memory": 36824, "step": 126552}
{"lr": 0.0008507743889510641, "data_time": 0.002640390396118164, "grad_norm": 0.21244267076253892, "loss": 0.47684748470783234, "time": 1.931886410713196, "epoch": 405, "memory": 36824, "step": 126652}
{"lr": 0.0008497199448405934, "data_time": 0.0030375242233276365, "grad_norm": 0.21488952338695527, "loss": 0.4767357349395752, "time": 1.8947808742523193, "epoch": 405, "memory": 36824, "step": 126752}
{"lr": 0.0008485283187296479, "data_time": 0.3533815622329712, "grad_norm": 0.2338698610663414, "loss": 0.4819976270198822, "time": 1.8589430809020997, "epoch": 406, "memory": 36824, "step": 126865}
{"lr": 0.000847473692121087, "data_time": 0.04381570816040039, "grad_norm": 0.2275511682033539, "loss": 0.4791973680257797, "time": 1.8993873357772828, "epoch": 406, "memory": 36824, "step": 126965}
{"lr": 0.0008464189827112883, "data_time": 0.003216862678527832, "grad_norm": 0.20977678894996643, "loss": 0.47937279641628266, "time": 2.026864171028137, "epoch": 406, "memory": 36824, "step": 127065}
{"lr": 0.0008452270637408356, "data_time": 0.31570436954498293, "grad_norm": 0.22840818017721176, "loss": 0.4799237698316574, "time": 1.8787014961242676, "epoch": 407, "memory": 36824, "step": 127178}
{"lr": 0.0008441721840966952, "data_time": 0.0026028633117675783, "grad_norm": 0.22906031757593154, "loss": 0.4776246339082718, "time": 1.83337345123291, "epoch": 407, "memory": 36824, "step": 127278}
{"lr": 0.0008431172274096418, "data_time": 0.0029468536376953125, "grad_norm": 0.27051655650138856, "loss": 0.47988441288471223, "time": 1.8106679677963258, "epoch": 407, "memory": 36824, "step": 127378}
{"lr": 0.0008419250359462893, "data_time": 0.09205911159515381, "grad_norm": 0.23403585702180862, "loss": 0.4758825898170471, "time": 1.877444291114807, "epoch": 408, "memory": 36824, "step": 127491}
{"lr": 0.0008408699212915522, "data_time": 0.0024181604385375977, "grad_norm": 0.26620970070362093, "loss": 0.47976144552230837, "time": 1.838224220275879, "epoch": 408, "memory": 36824, "step": 127591}
{"lr": 0.0008398147353535432, "data_time": 0.002919292449951172, "grad_norm": 0.2254301965236664, "loss": 0.48098852336406706, "time": 1.8561050653457642, "epoch": 408, "memory": 36824, "step": 127691}
{"lr": 0.0008386222917685538, "data_time": 0.045065999031066895, "grad_norm": 0.20881238132715224, "loss": 0.4794281005859375, "time": 1.8845239877700806, "epoch": 409, "memory": 36824, "step": 127804}
{"lr": 0.0008375669601322181, "data_time": 0.0029973268508911135, "grad_norm": 0.253585110604763, "loss": 0.47971965968608854, "time": 1.9117672443389893, "epoch": 409, "memory": 36824, "step": 127904}
{"lr": 0.0008365115629734701, "data_time": 0.003305864334106445, "grad_norm": 0.23396977335214614, "loss": 0.47744251787662506, "time": 1.8514698982238769, "epoch": 409, "memory": 36824, "step": 128004}
{"lr": 0.000835318887642414, "data_time": 0.17443833351135254, "grad_norm": 0.22678591161966324, "loss": 0.4840110421180725, "time": 1.8825471639633178, "epoch": 410, "memory": 36824, "step": 128117}
{"lr": 0.0008342633570571861, "data_time": 0.002619647979736328, "grad_norm": 0.23888854086399078, "loss": 0.4797113239765167, "time": 1.851240348815918, "epoch": 410, "memory": 36824, "step": 128217}
{"lr": 0.0008332077667115253, "data_time": 0.0030197381973266603, "grad_norm": 0.2216392233967781, "loss": 0.47992348968982695, "time": 2.107920598983765, "epoch": 410, "memory": 36824, "step": 128317}
{"lr": 0.0008320148800139323, "data_time": 0.003179764747619629, "grad_norm": 0.21907612979412078, "loss": 0.47338142096996305, "time": 1.8373804330825805, "epoch": 411, "memory": 36824, "step": 128430}
{"lr": 0.0008309591685159175, "data_time": 0.0024747371673583983, "grad_norm": 0.25282603353261945, "loss": 0.4751202881336212, "time": 1.850694751739502, "epoch": 411, "memory": 36824, "step": 128530}
{"lr": 0.0008299034030204701, "data_time": 0.0029752969741821287, "grad_norm": 0.2270902454853058, "loss": 0.47834177911281583, "time": 1.8473482847213745, "epoch": 411, "memory": 36824, "step": 128630}
{"lr": 0.000828710325339485, "data_time": 0.0026397466659545898, "grad_norm": 0.24806037098169326, "loss": 0.47889038026332853, "time": 1.8303627490997314, "epoch": 412, "memory": 36824, "step": 128743}
{"lr": 0.0008276544509678784, "data_time": 0.0028283357620239257, "grad_norm": 0.22907097190618514, "loss": 0.4754066526889801, "time": 1.9078738927841186, "epoch": 412, "memory": 36824, "step": 128843}
{"lr": 0.0008265985283627624, "data_time": 0.0035439014434814455, "grad_norm": 0.21248306035995485, "loss": 0.4830460250377655, "time": 1.8603789806365967, "epoch": 412, "memory": 36824, "step": 128943}
{"lr": 0.0008254052800847905, "data_time": 0.2773911714553833, "grad_norm": 0.2532137528061867, "loss": 0.47729685008525846, "time": 1.8021758794784546, "epoch": 413, "memory": 36824, "step": 129056}
{"lr": 0.0008243492608815744, "data_time": 0.002565431594848633, "grad_norm": 0.24625666439533234, "loss": 0.4759168416261673, "time": 1.881901216506958, "epoch": 413, "memory": 36824, "step": 129156}
{"lr": 0.0008232931992095951, "data_time": 0.0028466701507568358, "grad_norm": 0.22036525309085847, "loss": 0.48044078052043915, "time": 1.8632167816162108, "epoch": 413, "memory": 36824, "step": 129256}
{"lr": 0.0008220998007239555, "data_time": 0.23885390758514405, "grad_norm": 0.24009617120027543, "loss": 0.47816015779972076, "time": 1.8022626638412476, "epoch": 414, "memory": 36824, "step": 129369}
{"lr": 0.0008210436547335827, "data_time": 0.002540731430053711, "grad_norm": 0.23564732670783997, "loss": 0.47383093237876894, "time": 1.817442536354065, "epoch": 414, "memory": 36824, "step": 129469}
{"lr": 0.0008199874720399207, "data_time": 0.0033019304275512694, "grad_norm": 0.20402135998010634, "loss": 0.47850187718868253, "time": 1.8321004629135131, "epoch": 414, "memory": 36824, "step": 129569}
{"lr": 0.0008187939437385014, "data_time": 0.0025563478469848634, "grad_norm": 0.2269533932209015, "loss": 0.4781133532524109, "time": 1.8088059663772582, "epoch": 415, "memory": 36824, "step": 129682}
{"lr": 0.0008177376890075932, "data_time": 0.002556467056274414, "grad_norm": 0.22743298411369323, "loss": 0.4774677813053131, "time": 1.7378480672836303, "epoch": 415, "memory": 36824, "step": 129782}
{"lr": 0.0008166814033394951, "data_time": 0.0028182029724121093, "grad_norm": 0.22855043411254883, "loss": 0.47318809628486636, "time": 1.8342848300933838, "epoch": 415, "memory": 36824, "step": 129882}
{"lr": 0.0008154877656164024, "data_time": 0.11485209465026855, "grad_norm": 0.22630717903375625, "loss": 0.4743411898612976, "time": 1.8064898729324341, "epoch": 416, "memory": 36824, "step": 129995}
{"lr": 0.0008144314201934372, "data_time": 0.029825258255004882, "grad_norm": 0.24986685514450074, "loss": 0.47922136783599856, "time": 1.8220207214355468, "epoch": 416, "memory": 36824, "step": 130095}
{"lr": 0.0008133750495999117, "data_time": 0.003601694107055664, "grad_norm": 0.21650773882865906, "loss": 0.4726134270429611, "time": 1.806888222694397, "epoch": 416, "memory": 36824, "step": 130195}
{"lr": 0.0008121813228511208, "data_time": 0.11713192462921143, "grad_norm": 0.24158006608486177, "loss": 0.47637972235679626, "time": 1.7947293758392333, "epoch": 417, "memory": 36824, "step": 130308}
{"lr": 0.0008111249047861286, "data_time": 0.0028445959091186524, "grad_norm": 0.23331054747104646, "loss": 0.4770607233047485, "time": 1.8257728576660157, "epoch": 417, "memory": 36824, "step": 130408}
{"lr": 0.0008100684673176343, "data_time": 0.003986144065856933, "grad_norm": 0.21314919739961624, "loss": 0.4749320179224014, "time": 1.8689462184906005, "epoch": 417, "memory": 36824, "step": 130508}
{"lr": 0.0008088746719406414, "data_time": 0.14747462272644044, "grad_norm": 0.20266131758689881, "loss": 0.47997079193592074, "time": 1.8529289722442628, "epoch": 418, "memory": 36824, "step": 130621}
{"lr": 0.0008078181992848915, "data_time": 0.0026902675628662108, "grad_norm": 0.2312741160392761, "loss": 0.4784981578588486, "time": 1.888852882385254, "epoch": 418, "memory": 36824, "step": 130721}
{"lr": 0.0008067617129930295, "data_time": 0.0036916255950927733, "grad_norm": 0.2272215947508812, "loss": 0.47565110921859743, "time": 1.8336247205734253, "epoch": 418, "memory": 36824, "step": 130821}
{"lr": 0.0008055678693865045, "data_time": 0.080023193359375, "grad_norm": 0.2901524856686592, "loss": 0.4775607526302338, "time": 1.8525936126708984, "epoch": 419, "memory": 36824, "step": 130934}
{"lr": 0.0008045113601922, "data_time": 0.002611541748046875, "grad_norm": 0.22457283586263657, "loss": 0.4783609569072723, "time": 1.8689982891082764, "epoch": 419, "memory": 36824, "step": 131034}
{"lr": 0.0008034548431294078, "data_time": 0.003667736053466797, "grad_norm": 0.22115204334259034, "loss": 0.4829232424497604, "time": 1.8025116443634033, "epoch": 419, "memory": 36824, "step": 131134}
{"lr": 0.0008022609716928428, "data_time": 0.2772528171539307, "grad_norm": 0.22838271409273148, "loss": 0.47530355155467985, "time": 1.8486799478530884, "epoch": 420, "memory": 36824, "step": 131247}
{"lr": 0.0008012044440128113, "data_time": 0.0027660846710205076, "grad_norm": 0.3099089816212654, "loss": 0.4784427314996719, "time": 1.8463754892349242, "epoch": 420, "memory": 36824, "step": 131347}
{"lr": 0.0008001479142320485, "data_time": 0.0036422014236450195, "grad_norm": 0.23275035619735718, "loss": 0.47985902428627014, "time": 1.8400267124176026, "epoch": 420, "memory": 36824, "step": 131447}
{"lr": 0.0007989540353654128, "data_time": 0.027854347229003908, "grad_norm": 0.25849741846323016, "loss": 0.4785620480775833, "time": 1.8743897438049317, "epoch": 421, "memory": 36824, "step": 131560}
{"lr": 0.0007978975072527965, "data_time": 0.0029930830001831054, "grad_norm": 0.2630802929401398, "loss": 0.4779472678899765, "time": 1.889232087135315, "epoch": 421, "memory": 36824, "step": 131660}
{"lr": 0.0007968409828072424, "data_time": 0.0029808759689331056, "grad_norm": 0.2249189481139183, "loss": 0.4786208957433701, "time": 1.8686221361160278, "epoch": 421, "memory": 36824, "step": 131760}
{"lr": 0.0007956471169106329, "data_time": 0.1963284969329834, "grad_norm": 0.2197969764471054, "loss": 0.4731960028409958, "time": 1.837800717353821, "epoch": 422, "memory": 36824, "step": 131873}
{"lr": 0.0007945906064185819, "data_time": 0.00327448844909668, "grad_norm": 0.23335247337818146, "loss": 0.4800167948007584, "time": 2.208078980445862, "epoch": 422, "memory": 36824, "step": 131973}
{"lr": 0.0007935341053613246, "data_time": 0.003925824165344238, "grad_norm": 0.2461666852235794, "loss": 0.48231402337551116, "time": 1.8194245100021362, "epoch": 422, "memory": 36824, "step": 132073}
{"lr": 0.0007923402728346143, "data_time": 0.44463510513305665, "grad_norm": 0.24103166908025742, "loss": 0.47609278559684753, "time": 1.82249174118042, "epoch": 423, "memory": 36824, "step": 132186}
{"lr": 0.0007912837980159788, "data_time": 0.002487969398498535, "grad_norm": 0.23908236473798752, "loss": 0.47834576666355133, "time": 1.8184168338775635, "epoch": 423, "memory": 36824, "step": 132286}
{"lr": 0.0007902273383997057, "data_time": 0.0034127235412597656, "grad_norm": 0.21246519088745117, "loss": 0.4743619322776794, "time": 1.790303111076355, "epoch": 423, "memory": 36824, "step": 132386}
{"lr": 0.0007890335596421995, "data_time": 0.15289103984832764, "grad_norm": 0.25141356885433197, "loss": 0.4766746312379837, "time": 1.825708818435669, "epoch": 424, "memory": 36824, "step": 132499}
{"lr": 0.0007879771385492185, "data_time": 0.002882218360900879, "grad_norm": 0.24110794812440872, "loss": 0.47269250750541686, "time": 2.1392335414886476, "epoch": 424, "memory": 36824, "step": 132599}
{"lr": 0.0007869207384259109, "data_time": 0.0033977508544921877, "grad_norm": 0.23130353540182114, "loss": 0.47668353021144866, "time": 1.7829926013946533, "epoch": 424, "memory": 36824, "step": 132699}
{"lr": 0.0007857270338359928, "data_time": 0.002743244171142578, "grad_norm": 0.23383176177740098, "loss": 0.47731329798698424, "time": 1.81824631690979, "epoch": 425, "memory": 36824, "step": 132812}
{"lr": 0.000784670684519989, "data_time": 0.002718067169189453, "grad_norm": 0.22617775052785874, "loss": 0.48285445272922517, "time": 1.9086605310440063, "epoch": 425, "memory": 36824, "step": 132912}
{"lr": 0.0007836143619406096, "data_time": 0.003281831741333008, "grad_norm": 0.2516286700963974, "loss": 0.4762885183095932, "time": 1.850954270362854, "epoch": 425, "memory": 36824, "step": 133012}
{"lr": 0.0007824207519153976, "data_time": 0.24173197746276856, "grad_norm": 0.21967936158180237, "loss": 0.4789474934339523, "time": 1.8286454200744628, "epoch": 426, "memory": 36824, "step": 133125}
{"lr": 0.0007813644924264647, "data_time": 0.07158923149108887, "grad_norm": 0.250529570877552, "loss": 0.4752364635467529, "time": 2.053044390678406, "epoch": 426, "memory": 36824, "step": 133225}
{"lr": 0.0007803082654406514, "data_time": 0.003033876419067383, "grad_norm": 0.2320303350687027, "loss": 0.4711159199476242, "time": 1.8204488277435302, "epoch": 426, "memory": 36824, "step": 133325}
{"lr": 0.0007791147703756488, "data_time": 0.212565279006958, "grad_norm": 0.23859339505434035, "loss": 0.4768978595733643, "time": 1.7779695749282838, "epoch": 427, "memory": 36824, "step": 133438}
{"lr": 0.0007780586187623486, "data_time": 0.0028793811798095703, "grad_norm": 0.2138754278421402, "loss": 0.480308935046196, "time": 1.8390766620635985, "epoch": 427, "memory": 36824, "step": 133538}
{"lr": 0.0007770025054181077, "data_time": 0.003563690185546875, "grad_norm": 0.2393066793680191, "loss": 0.47693207263946535, "time": 1.76722412109375, "epoch": 427, "memory": 36824, "step": 133638}
{"lr": 0.0007758091457068513, "data_time": 0.19410929679870606, "grad_norm": 0.22545204758644105, "loss": 0.47820585668087007, "time": 1.8379581928253175, "epoch": 428, "memory": 36824, "step": 133751}
{"lr": 0.0007747531200159001, "data_time": 0.0025107145309448244, "grad_norm": 0.24183189272880554, "loss": 0.4781730681657791, "time": 1.8065636873245239, "epoch": 428, "memory": 36824, "step": 133851}
{"lr": 0.0007736971383592934, "data_time": 0.003983306884765625, "grad_norm": 0.24202762246131898, "loss": 0.4761603057384491, "time": 1.8364964962005614, "epoch": 428, "memory": 36824, "step": 133951}
{"lr": 0.0007725039343930078, "data_time": 0.0026361703872680663, "grad_norm": 0.231760174036026, "loss": 0.4783407419919968, "time": 1.8905081033706665, "epoch": 429, "memory": 36824, "step": 134064}
{"lr": 0.0007714480526689702, "data_time": 0.0028452634811401366, "grad_norm": 0.23985124826431276, "loss": 0.47597205340862275, "time": 1.8743184804916382, "epoch": 429, "memory": 36824, "step": 134164}
{"lr": 0.0007703922207438114, "data_time": 0.0036059141159057615, "grad_norm": 0.2571971818804741, "loss": 0.48303458988666537, "time": 1.7571135759353638, "epoch": 429, "memory": 36824, "step": 134264}
{"lr": 0.0007691991929110617, "data_time": 0.09891135692596435, "grad_norm": 0.240824419260025, "loss": 0.48186964690685274, "time": 1.8336669683456421, "epoch": 430, "memory": 36824, "step": 134377}
{"lr": 0.0007681434731960447, "data_time": 0.0025849103927612304, "grad_norm": 0.22167216688394548, "loss": 0.4783097803592682, "time": 1.8478557348251343, "epoch": 430, "memory": 36824, "step": 134477}
{"lr": 0.0007670878090435884, "data_time": 0.0030151844024658204, "grad_norm": 0.25801839530467985, "loss": 0.477810001373291, "time": 1.8481862545013428, "epoch": 430, "memory": 36824, "step": 134577}
{"lr": 0.0007658949777299262, "data_time": 0.05968773365020752, "grad_norm": 0.24634318351745604, "loss": 0.4804950714111328, "time": 1.9165079832077025, "epoch": 431, "memory": 36824, "step": 134690}
{"lr": 0.0007648394380632671, "data_time": 0.0026031494140625, "grad_norm": 0.24562852531671525, "loss": 0.477989649772644, "time": 1.8969316959381104, "epoch": 431, "memory": 36824, "step": 134790}
{"lr": 0.0007637839597219003, "data_time": 0.003123784065246582, "grad_norm": 0.23647209107875825, "loss": 0.47331652939319613, "time": 1.8581985473632812, "epoch": 431, "memory": 36824, "step": 134890}
{"lr": 0.000762591345309524, "data_time": 0.29504363536834716, "grad_norm": 0.2304363504052162, "loss": 0.47355752885341645, "time": 1.850936722755432, "epoch": 432, "memory": 36824, "step": 135003}
{"lr": 0.0007615360037274813, "data_time": 0.04850931167602539, "grad_norm": 0.2220526158809662, "loss": 0.47739604115486145, "time": 1.9736769437789916, "epoch": 432, "memory": 36824, "step": 135103}
{"lr": 0.0007604807292324164, "data_time": 0.003503274917602539, "grad_norm": 0.24592270106077194, "loss": 0.4844606935977936, "time": 1.856502366065979, "epoch": 432, "memory": 36824, "step": 135203}
{"lr": 0.0007592883520998139, "data_time": 0.23070287704467773, "grad_norm": 0.2342522457242012, "loss": 0.4784208983182907, "time": 1.8449200630187987, "epoch": 433, "memory": 36824, "step": 135316}
{"lr": 0.0007582332266352642, "data_time": 0.002550697326660156, "grad_norm": 0.23816951215267182, "loss": 0.47382366955280303, "time": 1.819218921661377, "epoch": 433, "memory": 36824, "step": 135416}
{"lr": 0.0007571781740182316, "data_time": 0.002870607376098633, "grad_norm": 0.24276857525110246, "loss": 0.4808425962924957, "time": 1.7592004537582397, "epoch": 433, "memory": 36824, "step": 135516}
{"lr": 0.0007559860545398393, "data_time": 0.24831552505493165, "grad_norm": 0.23683040738105773, "loss": 0.47501679956912995, "time": 2.091410517692566, "epoch": 434, "memory": 36824, "step": 135629}
{"lr": 0.0007549311632219678, "data_time": 0.0033753156661987306, "grad_norm": 0.25594541281461713, "loss": 0.4740790516138077, "time": 1.8857358932495116, "epoch": 434, "memory": 36824, "step": 135729}
{"lr": 0.0007538763505109058, "data_time": 0.0035296201705932615, "grad_norm": 0.22716879844665527, "loss": 0.47935362458229064, "time": 1.745747709274292, "epoch": 434, "memory": 36824, "step": 135829}
{"lr": 0.0007526845090567553, "data_time": 0.1368032932281494, "grad_norm": 0.23940365314483641, "loss": 0.47530982494354246, "time": 1.7762484312057496, "epoch": 435, "memory": 36824, "step": 135942}
{"lr": 0.0007516298699107414, "data_time": 0.0024877309799194334, "grad_norm": 0.24265017211437226, "loss": 0.4777947127819061, "time": 1.8985169887542725, "epoch": 435, "memory": 36824, "step": 136042}
{"lr": 0.000750575315129488, "data_time": 0.0038537979125976562, "grad_norm": 0.26333241909742355, "loss": 0.47635009288787844, "time": 1.9673072576522828, "epoch": 435, "memory": 36824, "step": 136142}
{"lr": 0.0007493837720648627, "data_time": 0.2769604206085205, "grad_norm": 0.26564662009477613, "loss": 0.47678788006305695, "time": 1.9602611541748047, "epoch": 436, "memory": 36824, "step": 136255}
{"lr": 0.0007483294031115812, "data_time": 0.0026941776275634767, "grad_norm": 0.24594366699457168, "loss": 0.4741718888282776, "time": 1.921656608581543, "epoch": 436, "memory": 36824, "step": 136355}
{"lr": 0.0007472751242795691, "data_time": 0.002932167053222656, "grad_norm": 0.2574354350566864, "loss": 0.4774865657091141, "time": 1.8048876762390136, "epoch": 436, "memory": 36824, "step": 136455}
{"lr": 0.0007460838999646521, "data_time": 0.0027107477188110353, "grad_norm": 0.25324773192405703, "loss": 0.48123112618923186, "time": 1.7628360986709595, "epoch": 437, "memory": 36824, "step": 136568}
{"lr": 0.0007450298192203582, "data_time": 0.00269777774810791, "grad_norm": 0.2481396734714508, "loss": 0.4771985948085785, "time": 1.7551825284957885, "epoch": 437, "memory": 36824, "step": 136668}
{"lr": 0.0007439758343523034, "data_time": 0.003647303581237793, "grad_norm": 0.21870044767856597, "loss": 0.4776772171258926, "time": 1.7411569118499757, "epoch": 437, "memory": 36824, "step": 136768}
{"lr": 0.0007427849491418316, "data_time": 0.39137632846832277, "grad_norm": 0.25748166590929034, "loss": 0.4771221160888672, "time": 2.0690407514572144, "epoch": 438, "memory": 36824, "step": 136881}
{"lr": 0.0007417311746178573, "data_time": 0.0025270700454711912, "grad_norm": 0.25457282811403276, "loss": 0.4783619433641434, "time": 1.7973019361495972, "epoch": 438, "memory": 36824, "step": 136981}
{"lr": 0.0007406775017234527, "data_time": 0.0029203176498413088, "grad_norm": 0.23338142037391663, "loss": 0.4737714320421219, "time": 1.7218928575515746, "epoch": 438, "memory": 36824, "step": 137081}
{"lr": 0.000739486975966368, "data_time": 0.05621640682220459, "grad_norm": 0.22649941593408585, "loss": 0.47177686989307405, "time": 1.8158883333206177, "epoch": 439, "memory": 36824, "step": 137194}
{"lr": 0.0007384335256688123, "data_time": 0.002735447883605957, "grad_norm": 0.22597016990184784, "loss": 0.47811690866947176, "time": 1.8231703042984009, "epoch": 439, "memory": 36824, "step": 137294}
{"lr": 0.0007373801827524208, "data_time": 0.0028579235076904297, "grad_norm": 0.21633834540843963, "loss": 0.48061280846595766, "time": 1.898893404006958, "epoch": 439, "memory": 36824, "step": 137394}
{"lr": 0.0007361900367915239, "data_time": 0.0028075456619262697, "grad_norm": 0.24930688589811326, "loss": 0.48149820864200593, "time": 1.8346873998641968, "epoch": 440, "memory": 36824, "step": 137507}
{"lr": 0.0007351369287209461, "data_time": 0.0026236534118652343, "grad_norm": 0.24416136145591735, "loss": 0.47527332305908204, "time": 1.867304253578186, "epoch": 440, "memory": 36824, "step": 137607}
{"lr": 0.0007340839337812915, "data_time": 0.0034949779510498047, "grad_norm": 0.24404537826776504, "loss": 0.47895618379116056, "time": 1.8365887641906737, "epoch": 440, "memory": 36824, "step": 137707}
{"lr": 0.0007328941879528936, "data_time": 0.4380256414413452, "grad_norm": 0.23718956857919693, "loss": 0.47777343094348906, "time": 1.8207547903060912, "epoch": 441, "memory": 36824, "step": 137820}
{"lr": 0.0007318414401040047, "data_time": 0.002586174011230469, "grad_norm": 0.2373011067509651, "loss": 0.4792849689722061, "time": 1.7809877395629883, "epoch": 441, "memory": 36824, "step": 137920}
{"lr": 0.0007307888111338654, "data_time": 0.003522014617919922, "grad_norm": 0.2505990475416183, "loss": 0.4805127650499344, "time": 1.8327145338058473, "epoch": 441, "memory": 36824, "step": 138020}
{"lr": 0.000729599485767439, "data_time": 0.00262911319732666, "grad_norm": 0.23367164731025697, "loss": 0.4827967405319214, "time": 1.8601922273635865, "epoch": 442, "memory": 36824, "step": 138133}
{"lr": 0.0007285471161287959, "data_time": 0.002762246131896973, "grad_norm": 0.23245954960584642, "loss": 0.47557036578655243, "time": 1.8827197074890136, "epoch": 442, "memory": 36824, "step": 138233}
{"lr": 0.000727494871114696, "data_time": 0.002932238578796387, "grad_norm": 0.2330123484134674, "loss": 0.47437508702278136, "time": 1.8869867086410523, "epoch": 442, "memory": 36824, "step": 138333}
{"lr": 0.0007263059865325309, "data_time": 0.05037074089050293, "grad_norm": 0.2596220448613167, "loss": 0.48015256226062775, "time": 1.791490411758423, "epoch": 443, "memory": 36824, "step": 138446}
{"lr": 0.0007252540130862259, "data_time": 0.0024975061416625975, "grad_norm": 0.2914534166455269, "loss": 0.476948493719101, "time": 1.8134910106658935, "epoch": 443, "memory": 36824, "step": 138546}
{"lr": 0.0007242021700081311, "data_time": 0.002823805809020996, "grad_norm": 0.2528399631381035, "loss": 0.47804678976535797, "time": 1.7991004705429077, "epoch": 443, "memory": 36824, "step": 138646}
{"lr": 0.0007230137465249847, "data_time": 0.3604735374450684, "grad_norm": 0.2548427566885948, "loss": 0.48109065890312197, "time": 1.8720584630966186, "epoch": 444, "memory": 36824, "step": 138759}
{"lr": 0.0007219621872463417, "data_time": 0.0026879310607910156, "grad_norm": 0.2451495572924614, "loss": 0.47918170094490053, "time": 1.8455673694610595, "epoch": 444, "memory": 36824, "step": 138859}
{"lr": 0.0007209107640773478, "data_time": 0.004084491729736328, "grad_norm": 0.2406461238861084, "loss": 0.4737572282552719, "time": 1.7701806783676148, "epoch": 444, "memory": 36824, "step": 138959}
{"lr": 0.0007197228220000984, "data_time": 0.32741727828979494, "grad_norm": 0.2741021513938904, "loss": 0.47735588252544403, "time": 1.8864901781082153, "epoch": 445, "memory": 36824, "step": 139072}
{"lr": 0.0007186716948573653, "data_time": 0.0026547908782958984, "grad_norm": 0.265852227807045, "loss": 0.47632436752319335, "time": 1.8681604385375976, "epoch": 445, "memory": 36824, "step": 139172}
{"lr": 0.0007176207095633925, "data_time": 0.003552889823913574, "grad_norm": 0.2399375781416893, "loss": 0.4771361231803894, "time": 1.8309251546859742, "epoch": 445, "memory": 36824, "step": 139272}
{"lr": 0.0007164332691906925, "data_time": 0.45178699493408203, "grad_norm": 0.2577701672911644, "loss": 0.4766554534435272, "time": 1.905554461479187, "epoch": 446, "memory": 36824, "step": 139385}
{"lr": 0.0007153825921447304, "data_time": 0.0032180309295654296, "grad_norm": 0.2690615579485893, "loss": 0.4745260179042816, "time": 1.8136770248413085, "epoch": 446, "memory": 36824, "step": 139485}
{"lr": 0.0007143320626842174, "data_time": 0.0030443429946899413, "grad_norm": 0.2367814302444458, "loss": 0.47636243999004363, "time": 1.8135367631912231, "epoch": 446, "memory": 36824, "step": 139585}
{"lr": 0.000713145144306146, "data_time": 0.20688855648040771, "grad_norm": 0.24013755172491075, "loss": 0.4793175935745239, "time": 1.867397952079773, "epoch": 447, "memory": 36824, "step": 139698}
{"lr": 0.0007120949353101279, "data_time": 0.0026952743530273436, "grad_norm": 0.26921994984149933, "loss": 0.47736958265304563, "time": 1.8406318187713624, "epoch": 447, "memory": 36824, "step": 139798}
{"lr": 0.0007110448796337251, "data_time": 0.0039558887481689455, "grad_norm": 0.2315066710114479, "loss": 0.47857125103473663, "time": 1.8734010457992554, "epoch": 447, "memory": 36824, "step": 139898}
{"lr": 0.0007098585035314431, "data_time": 0.17079060077667235, "grad_norm": 0.2348582550883293, "loss": 0.4761296033859253, "time": 1.8620499610900878, "epoch": 448, "memory": 36824, "step": 140011}
{"lr": 0.0007088087805305432, "data_time": 0.002673029899597168, "grad_norm": 0.25914826691150666, "loss": 0.47741848826408384, "time": 1.735466432571411, "epoch": 448, "memory": 36824, "step": 140111}
{"lr": 0.0007077592165808053, "data_time": 0.0037309885025024413, "grad_norm": 0.22497560530900956, "loss": 0.4720224678516388, "time": 1.8203281164169312, "epoch": 448, "memory": 36824, "step": 140211}
{"lr": 0.0007065734030262068, "data_time": 0.0027297258377075194, "grad_norm": 0.2347070872783661, "loss": 0.47393370866775514, "time": 1.8075021505355835, "epoch": 449, "memory": 36824, "step": 140324}
{"lr": 0.0007055241839572941, "data_time": 0.002653765678405762, "grad_norm": 0.24601014852523803, "loss": 0.474149614572525, "time": 1.7413951873779296, "epoch": 449, "memory": 36824, "step": 140424}
{"lr": 0.0007044751296683719, "data_time": 0.002901792526245117, "grad_norm": 0.24035085141658782, "loss": 0.4804053336381912, "time": 1.76664457321167, "epoch": 449, "memory": 36824, "step": 140524}
{"lr": 0.0007032898989237386, "data_time": 0.1695662260055542, "grad_norm": 0.25258546471595766, "loss": 0.47788395583629606, "time": 1.815054702758789, "epoch": 450, "memory": 36824, "step": 140637}
{"lr": 0.0007022412017150732, "data_time": 0.002831268310546875, "grad_norm": 0.22403514534235, "loss": 0.4707797259092331, "time": 1.8658908128738403, "epoch": 450, "memory": 36824, "step": 140737}
{"lr": 0.000701192675012412, "data_time": 0.0031325578689575194, "grad_norm": 0.23760129064321517, "loss": 0.47868562638759615, "time": 1.7900229454040528, "epoch": 450, "memory": 36824, "step": 140837}
{"lr": 0.0007000080473300676, "data_time": 0.02697575092315674, "grad_norm": 0.22917137295007706, "loss": 0.4794458270072937, "time": 1.7975216150283813, "epoch": 451, "memory": 36824, "step": 140950}
{"lr": 0.00069895988990099, "data_time": 0.003668475151062012, "grad_norm": 0.26497272998094556, "loss": 0.4790669232606888, "time": 1.794454312324524, "epoch": 451, "memory": 36824, "step": 141050}
{"lr": 0.0006979119087010158, "data_time": 0.003015279769897461, "grad_norm": 0.23536849915981292, "loss": 0.47828682959079744, "time": 1.7882856130599976, "epoch": 451, "memory": 36824, "step": 141150}
{"lr": 0.0006967279043229791, "data_time": 0.00318603515625, "grad_norm": 0.24897824227809906, "loss": 0.4759447515010834, "time": 1.7903350114822387, "epoch": 452, "memory": 36824, "step": 141263}
{"lr": 0.0006956803045836081, "data_time": 0.0025319337844848635, "grad_norm": 0.26524673998355863, "loss": 0.47587947845458983, "time": 1.879676389694214, "epoch": 452, "memory": 36824, "step": 141363}
{"lr": 0.0006946328867934302, "data_time": 0.0029480457305908203, "grad_norm": 0.2510866656899452, "loss": 0.47452124655246736, "time": 1.8188135623931885, "epoch": 452, "memory": 36824, "step": 141463}
{"lr": 0.0006934495259510683, "data_time": 0.456022310256958, "grad_norm": Infinity, "loss": 0.4743333488702774, "time": 1.8596108198165893, "epoch": 453, "memory": 36824, "step": 141576}
{"lr": 0.000692402501801993, "data_time": 0.0027266740798950195, "grad_norm": 0.2668886870145798, "loss": 0.4774475902318954, "time": 1.889987850189209, "epoch": 453, "memory": 36824, "step": 141676}
{"lr": 0.0006913556653190907, "data_time": 0.0033922910690307615, "grad_norm": 0.27026508152484896, "loss": 0.4761985450983047, "time": 1.8340195178985597, "epoch": 453, "memory": 36824, "step": 141776}
{"lr": 0.000690172968232776, "data_time": 0.23289000988006592, "grad_norm": 0.2395518586039543, "loss": 0.4778140723705292, "time": 1.8082622289657593, "epoch": 454, "memory": 36824, "step": 141889}
{"lr": 0.0006891265375647508, "data_time": 0.0028269290924072266, "grad_norm": 0.24664167314767838, "loss": 0.47719756662845614, "time": 1.8432327032089233, "epoch": 454, "memory": 36824, "step": 141989}
{"lr": 0.0006880803002766718, "data_time": 0.00397942066192627, "grad_norm": 0.24069984406232833, "loss": 0.47182725965976713, "time": 1.8146483898162842, "epoch": 454, "memory": 36824, "step": 142089}
{"lr": 0.0006868982871554342, "data_time": 0.15200135707855225, "grad_norm": 0.24015212208032607, "loss": 0.48153790831565857, "time": 1.8377280712127686, "epoch": 455, "memory": 36824, "step": 142202}
{"lr": 0.0006858524678490718, "data_time": 0.0030797719955444336, "grad_norm": 0.2431027591228485, "loss": 0.47228931784629824, "time": 1.8042462110519408, "epoch": 455, "memory": 36824, "step": 142302}
{"lr": 0.0006848068476331232, "data_time": 0.0038524866104125977, "grad_norm": 0.2477326735854149, "loss": 0.4769042402505875, "time": 2.176149106025696, "epoch": 455, "memory": 36824, "step": 142402}
{"lr": 0.0006836255386743058, "data_time": 0.1714927911758423, "grad_norm": 0.2186826601624489, "loss": 0.4785224884748459, "time": 1.845532274246216, "epoch": 456, "memory": 36824, "step": 142515}
{"lr": 0.0006825803485997728, "data_time": 0.0025420427322387696, "grad_norm": 0.27265157699584963, "loss": 0.47448545396327974, "time": 1.9164122819900513, "epoch": 456, "memory": 36824, "step": 142615}
{"lr": 0.0006815353633227207, "data_time": 0.0034914493560791017, "grad_norm": 0.26998805701732637, "loss": 0.477133110165596, "time": 1.895394778251648, "epoch": 456, "memory": 36824, "step": 142715}
{"lr": 0.0006803547787116339, "data_time": 0.14651498794555665, "grad_norm": 0.261428539454937, "loss": 0.4766128361225128, "time": 1.8375790596008301, "epoch": 457, "memory": 36824, "step": 142828}
{"lr": 0.000679310235728345, "data_time": 0.0025369882583618163, "grad_norm": 0.2508964270353317, "loss": 0.47655707895755767, "time": 1.8957202434539795, "epoch": 457, "memory": 36824, "step": 142928}
{"lr": 0.0006782659032461033, "data_time": 0.0030747413635253905, "grad_norm": 0.2512929767370224, "loss": 0.4760806620121002, "time": 2.12187397480011, "epoch": 457, "memory": 36824, "step": 143028}
{"lr": 0.0006770860631556807, "data_time": 0.0027093410491943358, "grad_norm": 0.2498065322637558, "loss": 0.47561059296131136, "time": 1.8717149496078491, "epoch": 458, "memory": 36824, "step": 143141}
{"lr": 0.0006760421851119929, "data_time": 0.002824091911315918, "grad_norm": 0.2538188800215721, "loss": 0.48255903720855714, "time": 1.8007834434509278, "epoch": 458, "memory": 36824, "step": 143241}
{"lr": 0.0006749985232693227, "data_time": 0.0034526824951171876, "grad_norm": 0.2576627627015114, "loss": 0.47585251927375793, "time": 1.780754280090332, "epoch": 458, "memory": 36824, "step": 143341}
{"lr": 0.0006738194478597769, "data_time": 0.003023862838745117, "grad_norm": 0.2416338086128235, "loss": 0.47433067560195924, "time": 1.8494186639785766, "epoch": 459, "memory": 36824, "step": 143454}
{"lr": 0.0006727762525926864, "data_time": 0.0029056787490844725, "grad_norm": 0.26415197402238844, "loss": 0.4774858564138412, "time": 1.8680569887161256, "epoch": 459, "memory": 36824, "step": 143554}
{"lr": 0.0006717332792228898, "data_time": 0.0027884483337402345, "grad_norm": 0.2278075784444809, "loss": 0.4794193387031555, "time": 2.098193573951721, "epoch": 459, "memory": 36824, "step": 143654}
{"lr": 0.0006705549886413658, "data_time": 0.5081486463546753, "grad_norm": 0.24317553788423538, "loss": 0.4758243292570114, "time": 1.7719066619873047, "epoch": 460, "memory": 36824, "step": 143767}
{"lr": 0.0006695124939762011, "data_time": 0.24194493293762206, "grad_norm": 0.2601670727133751, "loss": 0.4749580919742584, "time": 1.8499986410140992, "epoch": 460, "memory": 36824, "step": 143867}
{"lr": 0.0006684702269008149, "data_time": 0.0037209510803222655, "grad_norm": 0.2663235604763031, "loss": 0.48206789791584015, "time": 1.7893312454223633, "epoch": 460, "memory": 36824, "step": 143967}
{"lr": 0.000667292741281048, "data_time": 0.43232338428497313, "grad_norm": 0.245964515209198, "loss": 0.4769384294748306, "time": 1.8326948642730714, "epoch": 461, "memory": 36824, "step": 144080}
{"lr": 0.0006662509650311668, "data_time": 0.002623891830444336, "grad_norm": 0.26375871896743774, "loss": 0.47590611577034, "time": 1.815310549736023, "epoch": 461, "memory": 36824, "step": 144180}
{"lr": 0.0006652094220596595, "data_time": 0.0038559436798095703, "grad_norm": 0.2516747683286667, "loss": 0.4788630664348602, "time": 1.8031209707260132, "epoch": 461, "memory": 36824, "step": 144280}
{"lr": 0.0006640327615216304, "data_time": 0.24143054485321044, "grad_norm": 0.2344828203320503, "loss": 0.47709611654281614, "time": 1.9040923833847045, "epoch": 462, "memory": 36824, "step": 144393}
{"lr": 0.0006629917214881138, "data_time": 0.0025490760803222657, "grad_norm": 0.25353487133979796, "loss": 0.478237709403038, "time": 1.8418145179748535, "epoch": 462, "memory": 36824, "step": 144493}
{"lr": 0.0006619509204175811, "data_time": 0.0028799772262573242, "grad_norm": 0.28728632628917694, "loss": 0.4787161022424698, "time": 1.8480570793151856, "epoch": 462, "memory": 36824, "step": 144593}
{"lr": 0.0006607751050671712, "data_time": 0.33332414627075196, "grad_norm": 0.2908312126994133, "loss": 0.47791244089603424, "time": 1.7987972021102905, "epoch": 463, "memory": 36824, "step": 144706}
{"lr": 0.0006597348190385218, "data_time": 0.0025177955627441405, "grad_norm": 0.25391400754451754, "loss": 0.47672902047634125, "time": 1.76858389377594, "epoch": 463, "memory": 36824, "step": 144806}
{"lr": 0.0006586947776533823, "data_time": 0.004149055480957032, "grad_norm": 0.27446313202381134, "loss": 0.4803247839212418, "time": 1.8302064657211303, "epoch": 463, "memory": 36824, "step": 144906}
{"lr": 0.0006575198275820326, "data_time": 0.17285797595977784, "grad_norm": 0.2544463098049164, "loss": 0.47869937419891356, "time": 1.8061692714691162, "epoch": 464, "memory": 36824, "step": 145019}
{"lr": 0.0006564803133338686, "data_time": 0.0027104854583740235, "grad_norm": 0.255038295686245, "loss": 0.47933563590049744, "time": 1.8779173851013184, "epoch": 464, "memory": 36824, "step": 145119}
{"lr": 0.0006554410494055593, "data_time": 0.003317570686340332, "grad_norm": 0.24407469481229782, "loss": 0.4751342684030533, "time": 1.85147123336792, "epoch": 464, "memory": 36824, "step": 145219}
{"lr": 0.0006542669846899241, "data_time": 0.2148587226867676, "grad_norm": 0.2779406994581223, "loss": 0.47383624911308286, "time": 1.7888034582138062, "epoch": 465, "memory": 36824, "step": 145332}
{"lr": 0.0006532282599846756, "data_time": 0.002969646453857422, "grad_norm": 0.25712215304374697, "loss": 0.4748957246541977, "time": 1.8298644542694091, "epoch": 465, "memory": 36824, "step": 145432}
{"lr": 0.0006521897912713517, "data_time": 0.0031574726104736327, "grad_norm": 0.2576817750930786, "loss": 0.4805552989244461, "time": 1.8013969659805298, "epoch": 465, "memory": 36824, "step": 145532}
{"lr": 0.0006510166319729574, "data_time": 0.2948632001876831, "grad_norm": 0.271375247836113, "loss": 0.47444626986980437, "time": 1.915090012550354, "epoch": 466, "memory": 36824, "step": 145645}
{"lr": 0.0006499787145595628, "data_time": 0.002624320983886719, "grad_norm": 0.3082675561308861, "loss": 0.47517317831516265, "time": 1.8238131999969482, "epoch": 466, "memory": 36824, "step": 145745}
{"lr": 0.0006489410588057888, "data_time": 0.0031888723373413087, "grad_norm": 0.257384829223156, "loss": 0.47522581219673155, "time": 1.8138810634613036, "epoch": 466, "memory": 36824, "step": 145845}
{"lr": 0.000647768824970691, "data_time": 0.12112133502960205, "grad_norm": 0.247425439953804, "loss": 0.4786089062690735, "time": 1.8008623123168945, "epoch": 467, "memory": 36824, "step": 145958}
{"lr": 0.0006467317325842963, "data_time": 0.002732586860656738, "grad_norm": 0.26545884758234023, "loss": 0.4744102478027344, "time": 1.862134838104248, "epoch": 467, "memory": 36824, "step": 146058}
{"lr": 0.0006456949075207439, "data_time": 0.003031349182128906, "grad_norm": 0.25229449272155763, "loss": 0.4731316417455673, "time": 1.8188563585281372, "epoch": 467, "memory": 36824, "step": 146158}
{"lr": 0.0006445236191791851, "data_time": 0.01492297649383545, "grad_norm": 0.27973886430263517, "loss": 0.47649777233600615, "time": 1.8310170650482178, "epoch": 468, "memory": 36824, "step": 146271}
{"lr": 0.0006434873695408373, "data_time": 0.0027724266052246093, "grad_norm": 0.2868026703596115, "loss": 0.4812163352966309, "time": 1.821889042854309, "epoch": 468, "memory": 36824, "step": 146371}
{"lr": 0.0006424513928839875, "data_time": 0.0038678884506225587, "grad_norm": 0.26075326055288317, "loss": 0.4770809829235077, "time": 1.7623523473739624, "epoch": 468, "memory": 36824, "step": 146471}
{"lr": 0.0006412810700500534, "data_time": 0.2675415277481079, "grad_norm": 0.2798639670014381, "loss": 0.4778867602348328, "time": 1.7737703800201416, "epoch": 469, "memory": 36824, "step": 146584}
{"lr": 0.0006402456808663996, "data_time": 0.002733612060546875, "grad_norm": 0.26130103766918183, "loss": 0.4723150789737701, "time": 1.847607183456421, "epoch": 469, "memory": 36824, "step": 146684}
{"lr": 0.0006392105703182348, "data_time": 0.003464150428771973, "grad_norm": 0.25405284613370893, "loss": 0.4783969521522522, "time": 1.7695746421813965, "epoch": 469, "memory": 36824, "step": 146784}
{"lr": 0.000638041232989514, "data_time": 0.13449769020080565, "grad_norm": 0.27061196118593217, "loss": 0.4782293140888214, "time": 1.8448129653930665, "epoch": 470, "memory": 36824, "step": 146897}
{"lr": 0.0006370067219524986, "data_time": 0.0029716014862060545, "grad_norm": 0.23581310212612153, "loss": 0.4755379259586334, "time": 1.8020873308181762, "epoch": 470, "memory": 36824, "step": 146997}
{"lr": 0.0006359724952002037, "data_time": 0.0034685373306274415, "grad_norm": 0.28622601479291915, "loss": 0.47861872911453246, "time": 1.8288964748382568, "epoch": 470, "memory": 36824, "step": 147097}
{"lr": 0.0006348041633574433, "data_time": 0.39784607887268064, "grad_norm": 0.28043781965970993, "loss": 0.48023908734321596, "time": 1.8253737211227417, "epoch": 471, "memory": 36824, "step": 147210}
{"lr": 0.0006337705481440061, "data_time": 0.0025762319564819336, "grad_norm": 0.25339234322309495, "loss": 0.47595297992229463, "time": 1.895935344696045, "epoch": 471, "memory": 36824, "step": 147310}
{"lr": 0.0006327372228596613, "data_time": 0.003027510643005371, "grad_norm": 0.25486989617347716, "loss": 0.47308037281036375, "time": 1.966382384300232, "epoch": 471, "memory": 36824, "step": 147410}
{"lr": 0.0006315699164664303, "data_time": 0.30538082122802734, "grad_norm": 0.2682279959321022, "loss": 0.4802525281906128, "time": 1.816183376312256, "epoch": 472, "memory": 36824, "step": 147523}
{"lr": 0.0006305372147382027, "data_time": 0.0026933908462524413, "grad_norm": 0.27536560893058776, "loss": 0.4769159764051437, "time": 1.8182016134262085, "epoch": 472, "memory": 36824, "step": 147623}
{"lr": 0.0006295048085784863, "data_time": 0.003588962554931641, "grad_norm": 0.2622136116027832, "loss": 0.48015818893909457, "time": 1.782317543029785, "epoch": 472, "memory": 36824, "step": 147723}
{"lr": 0.0006283385475808305, "data_time": 0.0026496410369873046, "grad_norm": 0.2507291704416275, "loss": 0.4790505707263947, "time": 1.836978793144226, "epoch": 473, "memory": 36824, "step": 147836}
{"lr": 0.0006273067769838371, "data_time": 0.0034786462783813477, "grad_norm": 0.25911329686641693, "loss": 0.4754845857620239, "time": 1.9043111324310302, "epoch": 473, "memory": 36824, "step": 147936}
{"lr": 0.0006262753075897217, "data_time": 0.0030535459518432617, "grad_norm": 0.2952033281326294, "loss": 0.4771797150373459, "time": 1.8265142917633057, "epoch": 473, "memory": 36824, "step": 148036}
{"lr": 0.0006251101119158241, "data_time": 0.1051070213317871, "grad_norm": 0.27482024729251864, "loss": 0.47358361184597014, "time": 1.8439740419387818, "epoch": 474, "memory": 36824, "step": 148149}
{"lr": 0.0006240792900801758, "data_time": 0.0032373905181884766, "grad_norm": 0.2583567798137665, "loss": 0.4695391446352005, "time": 1.8077313899993896, "epoch": 474, "memory": 36824, "step": 148249}
{"lr": 0.0006230487750766269, "data_time": 0.004076194763183594, "grad_norm": 0.2567055195569992, "loss": 0.47799037098884584, "time": 1.808624005317688, "epoch": 474, "memory": 36824, "step": 148349}
{"lr": 0.0006218846646364655, "data_time": 0.05218045711517334, "grad_norm": 0.27363676577806473, "loss": 0.4795142948627472, "time": 1.8120652914047242, "epoch": 475, "memory": 36824, "step": 148462}
{"lr": 0.0006208548091760649, "data_time": 0.0030789852142333986, "grad_norm": 0.25550137609243395, "loss": 0.4767702490091324, "time": 2.1679700136184694, "epoch": 475, "memory": 36824, "step": 148562}
{"lr": 0.0006198252661717409, "data_time": 0.0030551433563232424, "grad_norm": 0.26944657862186433, "loss": 0.47862173020839693, "time": 1.8703495740890503, "epoch": 475, "memory": 36824, "step": 148662}
{"lr": 0.000618662260856751, "data_time": 0.21253662109375, "grad_norm": 0.250193327665329, "loss": 0.4770085006952286, "time": 1.8465912818908692, "epoch": 476, "memory": 36824, "step": 148775}
{"lr": 0.0006176333893689853, "data_time": 0.002828240394592285, "grad_norm": 0.26371807754039767, "loss": 0.4778652638196945, "time": 1.7545698642730714, "epoch": 476, "memory": 36824, "step": 148875}
{"lr": 0.0006166048359559366, "data_time": 0.0039986371994018555, "grad_norm": 0.2601328819990158, "loss": 0.47921724021434786, "time": 1.734780764579773, "epoch": 476, "memory": 36824, "step": 148975}
{"lr": 0.0006154429556386704, "data_time": 0.10547385215759278, "grad_norm": 0.2531684786081314, "loss": 0.47546292245388033, "time": 1.8268545627593995, "epoch": 477, "memory": 36824, "step": 149088}
{"lr": 0.0006144150857041155, "data_time": 0.002854800224304199, "grad_norm": 0.248580501973629, "loss": 0.47552565932273866, "time": 1.9889571905136108, "epoch": 477, "memory": 36824, "step": 149188}
{"lr": 0.0006133875394574808, "data_time": 0.00356292724609375, "grad_norm": 0.2610825389623642, "loss": 0.47584607303142545, "time": 1.7931777477264403, "epoch": 477, "memory": 36824, "step": 149288}
{"lr": 0.0006122268039912677, "data_time": 0.10612576007843018, "grad_norm": 0.2618945062160492, "loss": 0.4801526218652725, "time": 1.7974273920059205, "epoch": 478, "memory": 36824, "step": 149401}
{"lr": 0.0006111999531733848, "data_time": 0.00278170108795166, "grad_norm": 0.26799133121967317, "loss": 0.47871247828006747, "time": 1.7769393920898438, "epoch": 478, "memory": 36824, "step": 149501}
{"lr": 0.0006101734316510956, "data_time": 0.002830314636230469, "grad_norm": 0.26983000338077545, "loss": 0.4741547882556915, "time": 1.7352127075195312, "epoch": 478, "memory": 36824, "step": 149601}
{"lr": 0.0006090138608697028, "data_time": 0.01871190071105957, "grad_norm": 0.27942556887865067, "loss": 0.4767181158065796, "time": 1.8692965269088746, "epoch": 479, "memory": 36824, "step": 149714}
{"lr": 0.0006079880467145382, "data_time": 0.0026164531707763674, "grad_norm": 0.2969937399029732, "loss": 0.4718552500009537, "time": 2.0938000679016113, "epoch": 479, "memory": 36824, "step": 149814}
{"lr": 0.0006069625674570141, "data_time": 0.0036164522171020508, "grad_norm": 0.2650099441409111, "loss": 0.47573797702789306, "time": 1.8093550205230713, "epoch": 479, "memory": 36824, "step": 149914}
{"lr": 0.0006058041811743064, "data_time": 0.42274956703186034, "grad_norm": 0.26221046447753904, "loss": 0.475673970580101, "time": 1.8559202432632447, "epoch": 480, "memory": 36824, "step": 150027}
{"lr": 0.0006047794212101945, "data_time": 0.0030147314071655275, "grad_norm": 0.2639245718717575, "loss": 0.4775840938091278, "time": 1.8475267171859742, "epoch": 480, "memory": 36824, "step": 150127}
{"lr": 0.0006037550017400463, "data_time": 0.003740525245666504, "grad_norm": 0.2602442651987076, "loss": 0.4760151356458664, "time": 1.8065075874328613, "epoch": 480, "memory": 36824, "step": 150227}
{"lr": 0.0006025978197496486, "data_time": 0.2997490406036377, "grad_norm": 0.26950850933790205, "loss": 0.4764342248439789, "time": 1.8281580924987793, "epoch": 481, "memory": 36824, "step": 150340}
{"lr": 0.0006015741314869088, "data_time": 0.0027141571044921875, "grad_norm": 0.24268489629030227, "loss": 0.4737463116645813, "time": 1.7881190061569214, "epoch": 481, "memory": 36824, "step": 150440}
{"lr": 0.0006005507893086396, "data_time": 0.0035121679306030274, "grad_norm": 0.28335586488246917, "loss": 0.4781321197748184, "time": 1.8818413972854615, "epoch": 481, "memory": 36824, "step": 150540}
{"lr": 0.0005993948313835983, "data_time": 0.33630549907684326, "grad_norm": 0.2657764941453934, "loss": 0.47737436890602114, "time": 1.8617972612380982, "epoch": 482, "memory": 36824, "step": 150653}
{"lr": 0.0005983722323142388, "data_time": 0.0025830507278442384, "grad_norm": 0.2797523304820061, "loss": 0.47919723987579343, "time": 1.8142767190933227, "epoch": 482, "memory": 36824, "step": 150753}
{"lr": 0.0005973499849139425, "data_time": 0.0030560731887817384, "grad_norm": 0.26132518202066424, "loss": 0.4773691326379776, "time": 1.861368465423584, "epoch": 482, "memory": 36824, "step": 150853}
{"lr": 0.0005961952708063913, "data_time": 0.00279691219329834, "grad_norm": 0.25137012302875517, "loss": 0.47557859122753143, "time": 1.8446999549865724, "epoch": 483, "memory": 36824, "step": 150966}
{"lr": 0.0005951737784038097, "data_time": 0.0030704498291015624, "grad_norm": 0.24062993973493577, "loss": 0.4746594697237015, "time": 1.83613178730011, "epoch": 483, "memory": 36824, "step": 151066}
{"lr": 0.0005941526432488741, "data_time": 0.0040130615234375, "grad_norm": 0.27605346143245696, "loss": 0.47669441998004913, "time": 1.8266523122787475, "epoch": 483, "memory": 36824, "step": 151166}
{"lr": 0.0005929991926896909, "data_time": 0.5527269601821899, "grad_norm": 0.24488908648490906, "loss": 0.4795553147792816, "time": 1.8257141590118409, "epoch": 484, "memory": 36824, "step": 151279}
{"lr": 0.0005919788244083737, "data_time": 0.0026156425476074217, "grad_norm": 0.24655521661043167, "loss": 0.47646473050117494, "time": 1.8216100215911866, "epoch": 484, "memory": 36824, "step": 151379}
{"lr": 0.0005909588189471822, "data_time": 0.0029306411743164062, "grad_norm": 0.2868609860539436, "loss": 0.4784042775630951, "time": 1.7787392139434814, "epoch": 484, "memory": 36824, "step": 151479}
{"lr": 0.0005898066516456537, "data_time": 0.17815711498260497, "grad_norm": 0.2715705022215843, "loss": 0.47794600427150724, "time": 1.821051025390625, "epoch": 485, "memory": 36824, "step": 151592}
{"lr": 0.0005887874249208802, "data_time": 0.0027111530303955077, "grad_norm": 0.2449597269296646, "loss": 0.48081627786159514, "time": 1.8627847194671632, "epoch": 485, "memory": 36824, "step": 151692}
{"lr": 0.0005877685665825103, "data_time": 0.004413986206054687, "grad_norm": 0.2664750650525093, "loss": 0.4768296480178833, "time": 1.8909437656402588, "epoch": 485, "memory": 36824, "step": 151792}
{"lr": 0.0005866177022259973, "data_time": 0.2796390771865845, "grad_norm": 0.2479274496436119, "loss": 0.47649594247341154, "time": 1.8426161766052247, "epoch": 486, "memory": 36824, "step": 151905}
{"lr": 0.0005855996344735387, "data_time": 0.002938723564147949, "grad_norm": 0.27982947081327436, "loss": 0.4746179640293121, "time": 1.8889891624450683, "epoch": 486, "memory": 36824, "step": 152005}
{"lr": 0.0005845819406674689, "data_time": 0.003848719596862793, "grad_norm": 0.25612316876649854, "loss": 0.47540155351161956, "time": 1.940381145477295, "epoch": 486, "memory": 36824, "step": 152105}
{"lr": 0.0005834323989210676, "data_time": 0.26795828342437744, "grad_norm": 0.2625120311975479, "loss": 0.4772220253944397, "time": 1.8300596237182618, "epoch": 487, "memory": 36824, "step": 152218}
{"lr": 0.0005824155075368935, "data_time": 0.0029393672943115235, "grad_norm": 0.2660481512546539, "loss": 0.4741062492132187, "time": 1.8131548881530761, "epoch": 487, "memory": 36824, "step": 152318}
{"lr": 0.0005813989956527025, "data_time": 0.0031275510787963866, "grad_norm": 0.2746216386556625, "loss": 0.4765567034482956, "time": 1.7952574014663696, "epoch": 487, "memory": 36824, "step": 152418}
{"lr": 0.0005802507961589102, "data_time": 0.19625766277313234, "grad_norm": 0.2732161104679108, "loss": 0.47544704377651215, "time": 1.8095108270645142, "epoch": 488, "memory": 36824, "step": 152531}
{"lr": 0.000579235098518888, "data_time": 0.0027985572814941406, "grad_norm": 0.24708013087511063, "loss": 0.47480535209178926, "time": 1.8138251066207887, "epoch": 488, "memory": 36824, "step": 152631}
{"lr": 0.0005782197859259602, "data_time": 0.003656339645385742, "grad_norm": 0.26206204295158386, "loss": 0.4776599794626236, "time": 1.8374907732009889, "epoch": 488, "memory": 36824, "step": 152731}
{"lr": 0.0005770729483043378, "data_time": 0.13704185485839843, "grad_norm": 0.23615767657756806, "loss": 0.4787562847137451, "time": 1.7903201103210449, "epoch": 489, "memory": 36824, "step": 152844}
{"lr": 0.000576058461763938, "data_time": 0.0028883695602416994, "grad_norm": 0.2641646355390549, "loss": 0.4754114508628845, "time": 1.8652998685836792, "epoch": 489, "memory": 36824, "step": 152944}
{"lr": 0.0005750443658111644, "data_time": 0.003106212615966797, "grad_norm": 0.2686169818043709, "loss": 0.47504072189331054, "time": 1.809622836112976, "epoch": 489, "memory": 36824, "step": 153044}
{"lr": 0.0005738989096580023, "data_time": 0.21830463409423828, "grad_norm": 0.2830421686172485, "loss": 0.47334009408950806, "time": 1.8752363443374633, "epoch": 490, "memory": 36824, "step": 153157}
{"lr": 0.000572885651552001, "data_time": 0.002695512771606445, "grad_norm": 0.27228970229625704, "loss": 0.4759268671274185, "time": 1.9089969396591187, "epoch": 490, "memory": 36824, "step": 153257}
{"lr": 0.0005718727895674817, "data_time": 0.0030231475830078125, "grad_norm": 0.28866434544324876, "loss": 0.47342746555805204, "time": 1.9384355306625367, "epoch": 490, "memory": 36824, "step": 153357}
{"lr": 0.0005707287344554653, "data_time": 0.002577352523803711, "grad_norm": 0.2457954317331314, "loss": 0.4756885260343552, "time": 1.809970450401306, "epoch": 491, "memory": 36824, "step": 153470}
{"lr": 0.0005697167220976486, "data_time": 0.0026900053024291994, "grad_norm": 0.2654979214072227, "loss": 0.4785579204559326, "time": 1.9016629934310914, "epoch": 491, "memory": 36824, "step": 153570}
{"lr": 0.0005687051113884009, "data_time": 0.0031146764755249023, "grad_norm": 0.24848369508981705, "loss": 0.4760664016008377, "time": 1.8981022357940673, "epoch": 491, "memory": 36824, "step": 153670}
{"lr": 0.0005675624768662754, "data_time": 0.20748472213745117, "grad_norm": 0.2667399749159813, "loss": 0.4726444274187088, "time": 1.7806211471557618, "epoch": 492, "memory": 36824, "step": 153783}
{"lr": 0.0005665517275491413, "data_time": 0.002658653259277344, "grad_norm": 0.2511359125375748, "loss": 0.4752752512693405, "time": 1.7537097930908203, "epoch": 492, "memory": 36824, "step": 153883}
{"lr": 0.0005655413854008019, "data_time": 0.0035482168197631834, "grad_norm": 0.29171051681041715, "loss": 0.47837737798690794, "time": 2.2766236066818237, "epoch": 492, "memory": 36824, "step": 153983}
{"lr": 0.0005644001909930398, "data_time": 0.23867266178131102, "grad_norm": 0.2703396677970886, "loss": 0.4781782180070877, "time": 1.8012959003448485, "epoch": 493, "memory": 36824, "step": 154096}
{"lr": 0.0005633907219875047, "data_time": 0.003654956817626953, "grad_norm": 0.2853998914361, "loss": 0.4781800299882889, "time": 1.8138999462127685, "epoch": 493, "memory": 36824, "step": 154196}
{"lr": 0.0005623816656640339, "data_time": 0.0030796051025390623, "grad_norm": 0.2773993954062462, "loss": 0.4741984248161316, "time": 1.874470090866089, "epoch": 493, "memory": 36824, "step": 154296}
{"lr": 0.000561241930870498, "data_time": 0.4376210689544678, "grad_norm": 0.25813395977020265, "loss": 0.4753832548856735, "time": 1.819179081916809, "epoch": 494, "memory": 36824, "step": 154409}
{"lr": 0.0005602337594256044, "data_time": 0.3464935779571533, "grad_norm": 0.24824576675891877, "loss": 0.47767171561717986, "time": 1.8481786727905274, "epoch": 494, "memory": 36824, "step": 154509}
{"lr": 0.0005592260061689891, "data_time": 0.004253077507019043, "grad_norm": 0.2474662944674492, "loss": 0.4789738833904266, "time": 1.8196946620941161, "epoch": 494, "memory": 36824, "step": 154609}
{"lr": 0.0005580877504646018, "data_time": 0.3647473812103271, "grad_norm": 0.30332507491111754, "loss": 0.4802066504955292, "time": 2.1123025894165037, "epoch": 495, "memory": 36824, "step": 154722}
{"lr": 0.0005570808938072159, "data_time": 0.0030409574508666994, "grad_norm": 0.29216694831848145, "loss": 0.472135454416275, "time": 1.8597232818603515, "epoch": 495, "memory": 36824, "step": 154822}
{"lr": 0.0005560744608371803, "data_time": 0.003618001937866211, "grad_norm": 0.24945644587278365, "loss": 0.48094100058078765, "time": 1.7696412563323975, "epoch": 495, "memory": 36824, "step": 154922}
{"lr": 0.0005549377036715917, "data_time": 0.32992026805877683, "grad_norm": 0.28499314188957214, "loss": 0.4720092236995697, "time": 1.9134241819381714, "epoch": 496, "memory": 36824, "step": 155035}
{"lr": 0.0005539321790061167, "data_time": 0.002588987350463867, "grad_norm": Infinity, "loss": 0.47267802357673644, "time": 1.8197921991348267, "epoch": 496, "memory": 36824, "step": 155135}
{"lr": 0.0005529270835198221, "data_time": 0.0030696868896484377, "grad_norm": 0.24541741609573364, "loss": 0.47911728620529176, "time": 1.8321736097335815, "epoch": 496, "memory": 36824, "step": 155235}
{"lr": 0.0005517918443170753, "data_time": 0.03309898376464844, "grad_norm": 0.27635050266981126, "loss": 0.4758836358785629, "time": 1.9354610919952393, "epoch": 497, "memory": 36824, "step": 155348}
{"lr": 0.0005507876688251526, "data_time": 0.0026598453521728517, "grad_norm": 0.2622344642877579, "loss": 0.47455461919307707, "time": 1.8386106967926026, "epoch": 497, "memory": 36824, "step": 155448}
{"lr": 0.0005497839279969085, "data_time": 0.0029846668243408204, "grad_norm": 0.250193840265274, "loss": 0.4760705828666687, "time": 2.125657558441162, "epoch": 497, "memory": 36824, "step": 155548}
{"lr": 0.0005486502261551103, "data_time": 0.0028130292892456056, "grad_norm": 0.26629614681005476, "loss": 0.4813858181238174, "time": 1.8347516059875488, "epoch": 498, "memory": 36824, "step": 155661}
{"lr": 0.0005476474169953297, "data_time": 0.002707648277282715, "grad_norm": 0.284282249212265, "loss": 0.4757335245609283, "time": 1.860050630569458, "epoch": 498, "memory": 36824, "step": 155761}
{"lr": 0.0005466450479762966, "data_time": 0.0038179636001586916, "grad_norm": 0.2629920616745949, "loss": 0.4741183310747147, "time": 1.8529399394989015, "epoch": 498, "memory": 36824, "step": 155861}
{"lr": 0.0005455129028672823, "data_time": 0.5314297437667846, "grad_norm": 0.26313606798648836, "loss": 0.4751108378171921, "time": 2.138470435142517, "epoch": 499, "memory": 36824, "step": 155974}
{"lr": 0.000544511477174884, "data_time": 0.002726173400878906, "grad_norm": 0.3051550105214119, "loss": 0.47873041927814486, "time": 1.791356611251831, "epoch": 499, "memory": 36824, "step": 156074}
{"lr": 0.0005435104970927835, "data_time": 0.003688549995422363, "grad_norm": 0.25723263919353484, "loss": 0.4780588150024414, "time": 1.8439898490905762, "epoch": 499, "memory": 36824, "step": 156174}
{"lr": 0.0005423799280617905, "data_time": 0.0025847911834716796, "grad_norm": 0.25384562015533446, "loss": 0.4758119970560074, "time": 1.8397055864334106, "epoch": 500, "memory": 36824, "step": 156287}
{"lr": 0.0005413799029483782, "data_time": 0.002919578552246094, "grad_norm": 0.2702990785241127, "loss": 0.4778221219778061, "time": 1.8682271718978882, "epoch": 500, "memory": 36824, "step": 156387}
{"lr": 0.0005403803289071975, "data_time": 0.0035544395446777343, "grad_norm": 0.2777388125658035, "loss": 0.47497749626636504, "time": 1.871880340576172, "epoch": 500, "memory": 36824, "step": 156487}
{"lr": 0.0005392513552725302, "data_time": 0.08124477863311767, "grad_norm": 0.274552495777607, "loss": 0.4739195376634598, "time": 1.850379967689514, "epoch": 501, "memory": 36824, "step": 156600}
{"lr": 0.0005382527478257724, "data_time": 0.002707815170288086, "grad_norm": 0.2662373721599579, "loss": 0.47325519323348997, "time": 1.7872830867767333, "epoch": 501, "memory": 36824, "step": 156700}
{"lr": 0.0005372545969054745, "data_time": 0.0030478477478027345, "grad_norm": 0.26822466850280763, "loss": 0.4722421169281006, "time": 1.7608704090118408, "epoch": 501, "memory": 36824, "step": 156800}
{"lr": 0.0005361272379581784, "data_time": 0.33116564750671384, "grad_norm": 0.26513188481330874, "loss": 0.4769819527864456, "time": 1.8491237878799438, "epoch": 502, "memory": 36824, "step": 156913}
{"lr": 0.0005351300652415217, "data_time": 0.0027559995651245117, "grad_norm": 0.26833119690418245, "loss": 0.47743827998638155, "time": 1.8069379329681396, "epoch": 502, "memory": 36824, "step": 157013}
{"lr": 0.0005341333544977522, "data_time": 0.003606367111206055, "grad_norm": 0.28891257792711256, "loss": 0.4733560800552368, "time": 1.7822869777679444, "epoch": 502, "memory": 36824, "step": 157113}
{"lr": 0.0005330076295012806, "data_time": 0.2614925384521484, "grad_norm": 0.3056912004947662, "loss": 0.4740736722946167, "time": 1.8132774591445924, "epoch": 503, "memory": 36824, "step": 157226}
{"lr": 0.000532011908553656, "data_time": 0.0028104066848754885, "grad_norm": 0.2553567856550217, "loss": 0.4771213173866272, "time": 1.8154529571533202, "epoch": 503, "memory": 36824, "step": 157326}
{"lr": 0.0005310166550174509, "data_time": 0.003153061866760254, "grad_norm": 0.27095659226179125, "loss": 0.4778751194477081, "time": 1.8204784393310547, "epoch": 503, "memory": 36824, "step": 157426}
{"lr": 0.0005298925832073373, "data_time": 0.0024976253509521483, "grad_norm": 0.27128231823444365, "loss": 0.4765349417924881, "time": 1.8223272562026978, "epoch": 504, "memory": 36824, "step": 157539}
{"lr": 0.0005288983310428679, "data_time": 0.0030066728591918944, "grad_norm": 0.31038259863853457, "loss": 0.4736957550048828, "time": 1.7848949670791625, "epoch": 504, "memory": 36824, "step": 157639}
{"lr": 0.0005279045517203643, "data_time": 0.0031035900115966796, "grad_norm": 0.2758112519979477, "loss": 0.47897411584854127, "time": 1.778972578048706, "epoch": 504, "memory": 36824, "step": 157739}
{"lr": 0.0005267821523038955, "data_time": 0.47341964244842527, "grad_norm": 0.26958525031805036, "loss": 0.4742352575063705, "time": 1.786156988143921, "epoch": 505, "memory": 36824, "step": 157852}
{"lr": 0.0005257893859116081, "data_time": 0.0076816797256469725, "grad_norm": 0.2839749425649643, "loss": 0.47483948767185213, "time": 1.7759109497070313, "epoch": 505, "memory": 36824, "step": 157952}
{"lr": 0.0005247970977837526, "data_time": 0.0036045312881469727, "grad_norm": 0.2728490084409714, "loss": 0.47748484909534455, "time": 1.7716163635253905, "epoch": 505, "memory": 36824, "step": 158052}
{"lr": 0.0005236763899396373, "data_time": 0.3644952058792114, "grad_norm": 0.2674153074622154, "loss": 0.47448591887950897, "time": 1.8336344003677367, "epoch": 506, "memory": 36824, "step": 158165}
{"lr": 0.0005226851262831694, "data_time": 0.002842259407043457, "grad_norm": 0.2832582533359528, "loss": 0.4843310296535492, "time": 1.778447699546814, "epoch": 506, "memory": 36824, "step": 158265}
{"lr": 0.0005216943463054284, "data_time": 0.002893662452697754, "grad_norm": 0.2722241312265396, "loss": 0.4763665795326233, "time": 1.782872724533081, "epoch": 506, "memory": 36824, "step": 158365}
{"lr": 0.000520575349183472, "data_time": 0.3410356521606445, "grad_norm": 0.2743480995297432, "loss": 0.4737991988658905, "time": 1.8104553937911987, "epoch": 507, "memory": 36824, "step": 158478}
{"lr": 0.0005195856052007855, "data_time": 0.0029520273208618166, "grad_norm": 0.2574037492275238, "loss": 0.47138839960098267, "time": 1.7479170560836792, "epoch": 507, "memory": 36824, "step": 158578}
{"lr": 0.0005185963503028553, "data_time": 0.0030388832092285156, "grad_norm": 0.2660225570201874, "loss": 0.47645453214645384, "time": 1.8216747045516968, "epoch": 507, "memory": 36824, "step": 158678}
{"lr": 0.0005174790830236311, "data_time": 0.2148965120315552, "grad_norm": 0.2644634023308754, "loss": 0.4757453590631485, "time": 2.0550489902496336, "epoch": 508, "memory": 36824, "step": 158791}
{"lr": 0.0005164908756267192, "data_time": 0.0027135610580444336, "grad_norm": 0.26291900426149367, "loss": 0.4762988418340683, "time": 1.8132398128509521, "epoch": 508, "memory": 36824, "step": 158891}
{"lr": 0.0005155031627122357, "data_time": 0.0035954713821411133, "grad_norm": 0.25979765951633454, "loss": 0.4741836488246918, "time": 1.8079806327819825, "epoch": 508, "memory": 36824, "step": 158991}
{"lr": 0.0005143876443667587, "data_time": 0.08687353134155273, "grad_norm": 0.279993961751461, "loss": 0.47765201330184937, "time": 1.8043660402297974, "epoch": 509, "memory": 36824, "step": 159104}
{"lr": 0.0005134009904413606, "data_time": 0.0029956817626953123, "grad_norm": 0.27818574011325836, "loss": 0.47311738431453704, "time": 1.874069595336914, "epoch": 509, "memory": 36824, "step": 159204}
{"lr": 0.000512414836387613, "data_time": 0.0029985666275024413, "grad_norm": 0.3003753304481506, "loss": 0.4710966140031815, "time": 1.9321847915649415, "epoch": 509, "memory": 36824, "step": 159304}
{"lr": 0.0005113010860370139, "data_time": 0.49721264839172363, "grad_norm": 0.2570097655057907, "loss": 0.4799197196960449, "time": 1.7925386905670166, "epoch": 510, "memory": 36824, "step": 159417}
{"lr": 0.0005103160024423217, "data_time": 0.16923730373382567, "grad_norm": 0.2807843655347824, "loss": 0.4699270576238632, "time": 1.9648121833801269, "epoch": 510, "memory": 36824, "step": 159517}
{"lr": 0.0005093314240999622, "data_time": 0.0038540124893188476, "grad_norm": 0.2714665040373802, "loss": 0.4765081614255905, "time": 1.8307100772857665, "epoch": 510, "memory": 36824, "step": 159617}
{"lr": 0.0005082194607751604, "data_time": 0.20002446174621583, "grad_norm": 0.2838670119643211, "loss": 0.4772853970527649, "time": 1.7734345197677612, "epoch": 511, "memory": 36824, "step": 159730}
{"lr": 0.0005072359643435341, "data_time": 0.0026642322540283204, "grad_norm": 0.28377043455839157, "loss": 0.47736045718193054, "time": 1.813809370994568, "epoch": 511, "memory": 36824, "step": 159830}
{"lr": 0.0005062529785362892, "data_time": 0.003442049026489258, "grad_norm": 0.25824886560440063, "loss": 0.47555074989795687, "time": 1.8434817790985107, "epoch": 511, "memory": 36824, "step": 159930}
{"lr": 0.0005051428212376718, "data_time": 0.0028352260589599608, "grad_norm": 0.2801157683134079, "loss": 0.47565323412418364, "time": 1.7749516725540162, "epoch": 512, "memory": 36824, "step": 160043}
{"lr": 0.0005041609287743528, "data_time": 0.0026316404342651366, "grad_norm": 0.29222433269023895, "loss": 0.4759056121110916, "time": 1.8066681385040284, "epoch": 512, "memory": 36824, "step": 160143}
{"lr": 0.0005031795522987387, "data_time": 0.003027606010437012, "grad_norm": 0.2954046666622162, "loss": 0.4777134358882904, "time": 1.7746588945388795, "epoch": 512, "memory": 36824, "step": 160243}
{"lr": 0.0005020712199958301, "data_time": 0.1446469783782959, "grad_norm": 0.2809794843196869, "loss": 0.4793731063604355, "time": 1.856548285484314, "epoch": 513, "memory": 36824, "step": 160356}
{"lr": 0.0005010909482786514, "data_time": 0.003045964241027832, "grad_norm": 0.2932552918791771, "loss": 0.477404859662056, "time": 1.8513891458511353, "epoch": 513, "memory": 36824, "step": 160456}
{"lr": 0.0005001111979036843, "data_time": 0.002953791618347168, "grad_norm": 0.2845895975828171, "loss": 0.47501630187034605, "time": 1.83493070602417, "epoch": 513, "memory": 36824, "step": 160556}
{"lr": 0.0004990047095348254, "data_time": 0.13770172595977784, "grad_norm": 0.28988664597272873, "loss": 0.476766312122345, "time": 1.8255449771881103, "epoch": 514, "memory": 36824, "step": 160669}
{"lr": 0.0004980260753139258, "data_time": 0.002816033363342285, "grad_norm": 0.27927435636520387, "loss": 0.4808007389307022, "time": 1.7027937173843384, "epoch": 514, "memory": 36824, "step": 160769}
{"lr": 0.0004970479677808354, "data_time": 0.0029013395309448243, "grad_norm": 0.2907367005944252, "loss": 0.4780228793621063, "time": 1.7606521129608155, "epoch": 514, "memory": 36824, "step": 160869}
{"lr": 0.0004959433422528599, "data_time": 0.27875754833221433, "grad_norm": 0.2881035953760147, "loss": 0.4766349881887436, "time": 1.819187879562378, "epoch": 515, "memory": 36824, "step": 160982}
{"lr": 0.0004949663622503995, "data_time": 0.002642369270324707, "grad_norm": 0.27522737979888917, "loss": 0.47462581992149355, "time": 1.710511827468872, "epoch": 515, "memory": 36824, "step": 161082}
{"lr": 0.0004939899142723458, "data_time": 0.0030797004699707033, "grad_norm": 0.24406778663396836, "loss": 0.4771204054355621, "time": 1.8739780187606812, "epoch": 515, "memory": 36824, "step": 161182}
{"lr": 0.0004928871704602564, "data_time": 0.10889303684234619, "grad_norm": 0.3043875157833099, "loss": 0.47864271998405455, "time": 1.7972241640090942, "epoch": 516, "memory": 36824, "step": 161295}
{"lr": 0.0004919118613701251, "data_time": 0.002730274200439453, "grad_norm": 0.29253980666399004, "loss": 0.4760801911354065, "time": 1.8609049797058106, "epoch": 516, "memory": 36824, "step": 161395}
{"lr": 0.0004909370896319095, "data_time": 0.003812289237976074, "grad_norm": 0.24834232777357101, "loss": 0.4736328512430191, "time": 1.97597336769104, "epoch": 516, "memory": 36824, "step": 161495}
{"lr": 0.0004898362463785547, "data_time": 0.002614903450012207, "grad_norm": 0.297034764289856, "loss": 0.4763505131006241, "time": 1.8370972394943237, "epoch": 517, "memory": 36824, "step": 161608}
{"lr": 0.0004888626248660966, "data_time": 0.0031261205673217773, "grad_norm": 0.2696431875228882, "loss": 0.47268244028091433, "time": 1.8282767295837403, "epoch": 517, "memory": 36824, "step": 161708}
{"lr": 0.0004878895460238786, "data_time": 0.0030652284622192383, "grad_norm": 0.28689204901456833, "loss": 0.47168256640434264, "time": 1.7225566387176514, "epoch": 517, "memory": 36824, "step": 161808}
{"lr": 0.0004867906221396325, "data_time": 0.3152590036392212, "grad_norm": 0.3155015140771866, "loss": 0.47654456198215484, "time": 1.8573760986328125, "epoch": 518, "memory": 36824, "step": 161921}
{"lr": 0.00048581870484135087, "data_time": 0.002834773063659668, "grad_norm": 0.2836906731128693, "loss": 0.46779236793518064, "time": 1.756091833114624, "epoch": 518, "memory": 36824, "step": 162021}
{"lr": 0.000484847335522363, "data_time": 0.0036679983139038088, "grad_norm": 0.2805831581354141, "loss": 0.47612652480602263, "time": 2.2221815824508666, "epoch": 518, "memory": 36824, "step": 162121}
{"lr": 0.0004837503497848027, "data_time": 0.42645955085754395, "grad_norm": 0.2932167246937752, "loss": 0.4736118495464325, "time": 1.8767351388931275, "epoch": 519, "memory": 36824, "step": 162234}
{"lr": 0.0004827801533080835, "data_time": 0.003122687339782715, "grad_norm": 0.2737408623099327, "loss": 0.4771709769964218, "time": 1.8949090003967286, "epoch": 519, "memory": 36824, "step": 162334}
{"lr": 0.00048181051011034795, "data_time": 0.003225421905517578, "grad_norm": 0.2924145430326462, "loss": 0.4785519242286682, "time": 1.8485027313232423, "epoch": 519, "memory": 36824, "step": 162434}
{"lr": 0.0004807154812639334, "data_time": 0.23118290901184083, "grad_norm": 0.25906546860933305, "loss": 0.4780913472175598, "time": 1.8817082643508911, "epoch": 520, "memory": 36824, "step": 162547}
{"lr": 0.0004797470221867575, "data_time": 0.002848529815673828, "grad_norm": 0.25599356144666674, "loss": 0.47777454257011415, "time": 1.819270658493042, "epoch": 520, "memory": 36824, "step": 162647}
{"lr": 0.0004787791216788005, "data_time": 0.004047584533691406, "grad_norm": 0.2847966194152832, "loss": 0.47422048151493074, "time": 2.158247232437134, "epoch": 520, "memory": 36824, "step": 162747}
{"lr": 0.00047768606843455306, "data_time": 0.4433659315109253, "grad_norm": 0.29999587684869766, "loss": 0.473056834936142, "time": 1.8896317720413207, "epoch": 521, "memory": 36824, "step": 162860}
{"lr": 0.0004767193633052144, "data_time": 0.002638435363769531, "grad_norm": 0.2713018074631691, "loss": 0.4761017858982086, "time": 1.8800551652908326, "epoch": 521, "memory": 36824, "step": 162960}
{"lr": 0.00047575322202578554, "data_time": 0.0028462886810302736, "grad_norm": 0.2951666280627251, "loss": 0.474238795042038, "time": 1.8779545068740844, "epoch": 521, "memory": 36824, "step": 163060}
{"lr": 0.0004746621630609696, "data_time": 0.34365718364715575, "grad_norm": 0.286071865260601, "loss": 0.4770855814218521, "time": 1.9183747529983521, "epoch": 522, "memory": 36824, "step": 163173}
{"lr": 0.0004736972283977906, "data_time": 0.0029456615447998047, "grad_norm": 0.2911667451262474, "loss": 0.4735236257314682, "time": 1.9179621696472169, "epoch": 522, "memory": 36824, "step": 163273}
{"lr": 0.00047273286285557823, "data_time": 0.0036364078521728517, "grad_norm": 0.26903605461120605, "loss": 0.47679795026779176, "time": 1.9245811223983764, "epoch": 522, "memory": 36824, "step": 163373}
{"lr": 0.00047164381681338104, "data_time": 0.0027612924575805666, "grad_norm": 0.2705685213208199, "loss": 0.4770344167947769, "time": 1.8924628734588622, "epoch": 523, "memory": 36824, "step": 163486}
{"lr": 0.0004706806691044338, "data_time": 0.0027617216110229492, "grad_norm": 0.2776452645659447, "loss": 0.47250063717365265, "time": 1.8783803939819337, "epoch": 523, "memory": 36824, "step": 163586}
{"lr": 0.0004697180957777843, "data_time": 0.0035228729248046875, "grad_norm": 0.28018796592950823, "loss": 0.47448280453681946, "time": 1.9575575351715089, "epoch": 523, "memory": 36824, "step": 163686}
{"lr": 0.00046863108126699797, "data_time": 0.42794346809387207, "grad_norm": 0.27463153600692747, "loss": 0.4766549915075302, "time": 1.947228693962097, "epoch": 524, "memory": 36824, "step": 163799}
{"lr": 0.00046766973696981903, "data_time": 0.0024484634399414063, "grad_norm": 0.27629473656415937, "loss": 0.47676214277744294, "time": 1.890842866897583, "epoch": 524, "memory": 36824, "step": 163899}
{"lr": 0.0004667089723064554, "data_time": 0.003066730499267578, "grad_norm": 0.2942787066102028, "loss": 0.47915984988212584, "time": 1.8633641242980956, "epoch": 524, "memory": 36824, "step": 163999}
{"lr": 0.0004656240079011579, "data_time": 0.246038556098938, "grad_norm": 0.27697137743234634, "loss": 0.4739941835403442, "time": 1.8818383455276488, "epoch": 525, "memory": 36824, "step": 164112}
{"lr": 0.00046466448344246834, "data_time": 0.0027916431427001953, "grad_norm": 0.28489588350057604, "loss": 0.47780426442623136, "time": 1.9754776000976562, "epoch": 525, "memory": 36824, "step": 164212}
{"lr": 0.0004637055438592075, "data_time": 0.0032926082611083986, "grad_norm": 0.257710824906826, "loss": 0.47379578948020934, "time": 1.8762965440750121, "epoch": 525, "memory": 36824, "step": 164312}
{"lr": 0.0004626226480984481, "data_time": 0.11091394424438476, "grad_norm": 0.2938148856163025, "loss": 0.47332372665405276, "time": 1.8783145904541017, "epoch": 526, "memory": 36824, "step": 164425}
{"lr": 0.00046166495987387394, "data_time": 0.002545452117919922, "grad_norm": 0.28910419791936875, "loss": 0.475205597281456, "time": 2.120392942428589, "epoch": 526, "memory": 36824, "step": 164525}
{"lr": 0.0004607078617563475, "data_time": 0.002775406837463379, "grad_norm": 0.289199723303318, "loss": 0.4754611223936081, "time": 2.4291425466537477, "epoch": 526, "memory": 36824, "step": 164625}
{"lr": 0.0004596270531438264, "data_time": 0.02511258125305176, "grad_norm": 0.2950010120868683, "loss": 0.4795080691576004, "time": 1.8538218021392823, "epoch": 527, "memory": 36824, "step": 164738}
{"lr": 0.0004586712175176175, "data_time": 0.0026789188385009767, "grad_norm": 0.3148882672190666, "loss": 0.4790179580450058, "time": 1.8888391494750976, "epoch": 527, "memory": 36824, "step": 164838}
{"lr": 0.00045771597721999094, "data_time": 0.0030423641204833985, "grad_norm": 0.27275444120168685, "loss": 0.47432574927806853, "time": 1.9273500919342041, "epoch": 527, "memory": 36824, "step": 164938}
{"lr": 0.0004566372742237455, "data_time": 0.08190455436706542, "grad_norm": 0.26423788219690325, "loss": 0.4738211452960968, "time": 1.9205652236938477, "epoch": 528, "memory": 36824, "step": 165051}
{"lr": 0.00045568330752849455, "data_time": 0.0027332544326782227, "grad_norm": 0.28178553879261015, "loss": 0.4777326762676239, "time": 1.7885413646697998, "epoch": 528, "memory": 36824, "step": 165151}
{"lr": 0.0004547299413731893, "data_time": 0.0029421567916870115, "grad_norm": 0.2674506813287735, "loss": 0.47274188995361327, "time": 1.952996516227722, "epoch": 528, "memory": 36824, "step": 165251}
{"lr": 0.00045365336242527793, "data_time": 0.13707218170166016, "grad_norm": 0.2592381343245506, "loss": 0.4736932247877121, "time": 1.9079413414001465, "epoch": 529, "memory": 36824, "step": 165364}
{"lr": 0.00045270128096164426, "data_time": 0.0026799440383911133, "grad_norm": 0.26404818147420883, "loss": 0.47875631749629977, "time": 1.9996716260910035, "epoch": 529, "memory": 36824, "step": 165464}
{"lr": 0.00045174980523905815, "data_time": 0.0029210567474365233, "grad_norm": 0.28191332519054413, "loss": 0.4788811594247818, "time": 1.9171878337860107, "epoch": 529, "memory": 36824, "step": 165564}
{"lr": 0.0004506753687352435, "data_time": 0.0025180578231811523, "grad_norm": 0.29960031509399415, "loss": 0.47357864677906036, "time": 2.268267369270325, "epoch": 530, "memory": 36824, "step": 165677}
{"lr": 0.0004497251887716728, "data_time": 0.0025913238525390623, "grad_norm": 0.28121372163295744, "loss": 0.47596087157726286, "time": 1.840859580039978, "epoch": 530, "memory": 36824, "step": 165777}
{"lr": 0.0004487756197399004, "data_time": 0.0030262231826782226, "grad_norm": 0.28296193182468415, "loss": 0.4763704925775528, "time": 1.9233962535858153, "epoch": 530, "memory": 36824, "step": 165877}
{"lr": 0.0004477033440393389, "data_time": 0.0025712013244628905, "grad_norm": 0.28082297593355177, "loss": 0.47328412234783174, "time": 1.8596377611160277, "epoch": 531, "memory": 36824, "step": 165990}
{"lr": 0.0004467550818117844, "data_time": 0.002410888671875, "grad_norm": 0.28537911623716355, "loss": 0.4719033777713776, "time": 1.8511821269989013, "epoch": 531, "memory": 36824, "step": 166090}
{"lr": 0.0004458074356963402, "data_time": 0.0029134273529052733, "grad_norm": 0.30255533903837206, "loss": 0.473774915933609, "time": 1.8694649696350099, "epoch": 531, "memory": 36824, "step": 166190}
{"lr": 0.00044473733912126515, "data_time": 0.3290150880813599, "grad_norm": 0.3001902520656586, "loss": 0.47737071514129636, "time": 1.8857256650924683, "epoch": 532, "memory": 36824, "step": 166303}
{"lr": 0.00044379101083291206, "data_time": 0.003182864189147949, "grad_norm": 0.27698233276605605, "loss": 0.4736012667417526, "time": 1.8713831901550293, "epoch": 532, "memory": 36824, "step": 166403}
{"lr": 0.0004428453038264537, "data_time": 0.003442549705505371, "grad_norm": Infinity, "loss": 0.47310749888420106, "time": 1.8956368446350098, "epoch": 532, "memory": 36824, "step": 166503}
{"lr": 0.00044177740466186193, "data_time": 0.14967052936553954, "grad_norm": 0.30671258121728895, "loss": 0.4808221310377121, "time": 1.8551599740982057, "epoch": 533, "memory": 36824, "step": 166616}
{"lr": 0.00044083302648284956, "data_time": 0.002655172348022461, "grad_norm": 0.28090524673461914, "loss": 0.47654486298561094, "time": 1.9673047304153441, "epoch": 533, "memory": 36824, "step": 166716}
{"lr": 0.00043988927474490055, "data_time": 0.002962684631347656, "grad_norm": 0.33396745324134824, "loss": 0.4752041041851044, "time": 1.855766487121582, "epoch": 533, "memory": 36824, "step": 166816}
{"lr": 0.0004388235912382436, "data_time": 0.02724151611328125, "grad_norm": 0.3929624974727631, "loss": 0.4736098051071167, "time": 1.9444246530532836, "epoch": 534, "memory": 36824, "step": 166929}
{"lr": 0.00043788117930538943, "data_time": 0.002670001983642578, "grad_norm": 0.27431467473506926, "loss": 0.47335954308509826, "time": 1.9073207139968873, "epoch": 534, "memory": 36824, "step": 167029}
{"lr": 0.0004369393989620631, "data_time": 0.003461766242980957, "grad_norm": 0.28106362372636795, "loss": 0.47534930109977724, "time": 1.9445226669311524, "epoch": 534, "memory": 36824, "step": 167129}
{"lr": 0.0004358759493229307, "data_time": 0.18159003257751466, "grad_norm": 0.2955913469195366, "loss": 0.474027681350708, "time": 1.9450948715209961, "epoch": 535, "memory": 36824, "step": 167242}
{"lr": 0.0004349355197394547, "data_time": 0.002953028678894043, "grad_norm": 0.2826595589518547, "loss": 0.4786590874195099, "time": 2.027086281776428, "epoch": 535, "memory": 36824, "step": 167342}
{"lr": 0.00043399572688317876, "data_time": 0.0034085512161254883, "grad_norm": 0.30095467120409014, "loss": 0.47812720239162443, "time": 2.4223660469055175, "epoch": 535, "memory": 36824, "step": 167442}
{"lr": 0.00043293452928299146, "data_time": 0.2810868501663208, "grad_norm": 0.2813489958643913, "loss": 0.476585391163826, "time": 2.020255208015442, "epoch": 536, "memory": 36824, "step": 167555}
{"lr": 0.0004319960981182411, "data_time": 0.0027747154235839844, "grad_norm": 0.2758315816521645, "loss": 0.47607489824295046, "time": 1.9818167209625244, "epoch": 536, "memory": 36824, "step": 167655}
{"lr": 0.00043105830880748203, "data_time": 0.0036468744277954102, "grad_norm": 0.28483992218971255, "loss": 0.4761408269405365, "time": 2.0111045122146605, "epoch": 536, "memory": 36824, "step": 167755}
{"lr": 0.0004299993813791793, "data_time": 0.4758938789367676, "grad_norm": 0.271919447183609, "loss": 0.47456025183200834, "time": 1.9804401636123656, "epoch": 537, "memory": 36824, "step": 167868}
{"lr": 0.00042906296466835374, "data_time": 0.0025299549102783202, "grad_norm": 0.27828449159860613, "loss": 0.47388439774513247, "time": 1.9322337627410888, "epoch": 537, "memory": 36824, "step": 167968}
{"lr": 0.0004281271949273433, "data_time": 0.0028600454330444335, "grad_norm": 0.28047580271959305, "loss": 0.47544741332530976, "time": 1.9652219772338868, "epoch": 537, "memory": 36824, "step": 168068}
{"lr": 0.00042707055576507257, "data_time": 0.24716489315032958, "grad_norm": 0.2865805447101593, "loss": 0.47440639734268186, "time": 1.9115853548049926, "epoch": 538, "memory": 36824, "step": 168181}
{"lr": 0.00042613616950894966, "data_time": 0.002452373504638672, "grad_norm": 0.303554105758667, "loss": 0.47681569755077363, "time": 1.9530848741531373, "epoch": 538, "memory": 36824, "step": 168281}
{"lr": 0.0004252024353274113, "data_time": 0.0031847000122070313, "grad_norm": 0.28834892958402636, "loss": 0.4761597514152527, "time": 1.9526543140411377, "epoch": 538, "memory": 36824, "step": 168381}
{"lr": 0.000424148102486221, "data_time": 0.15385916233062744, "grad_norm": 0.29815013110637667, "loss": 0.47298136055469514, "time": 1.977655291557312, "epoch": 539, "memory": 36824, "step": 168494}
{"lr": 0.0004232157626508832, "data_time": 0.0028921127319335937, "grad_norm": 0.29769045412540435, "loss": 0.47317166030406954, "time": 2.262932825088501, "epoch": 539, "memory": 36824, "step": 168594}
{"lr": 0.000422284079983758, "data_time": 0.0029407024383544924, "grad_norm": 0.2908050924539566, "loss": 0.47329733371734617, "time": 1.84139142036438, "epoch": 539, "memory": 36824, "step": 168694}
{"lr": 0.000421232071479287, "data_time": 0.1770857334136963, "grad_norm": 0.31505960822105405, "loss": 0.477279731631279, "time": 1.9017868518829346, "epoch": 540, "memory": 36824, "step": 168807}
{"lr": 0.0004203017939958499, "data_time": 0.0028848886489868165, "grad_norm": 0.2768129646778107, "loss": 0.48064024448394777, "time": 1.9571651220321655, "epoch": 540, "memory": 36824, "step": 168907}
{"lr": 0.0004193721787630235, "data_time": 0.003608441352844238, "grad_norm": 0.30869274735450747, "loss": 0.4779611200094223, "time": 1.94905366897583, "epoch": 540, "memory": 36824, "step": 169007}
{"lr": 0.0004183225125711951, "data_time": 0.28302338123321535, "grad_norm": 0.29801359176635744, "loss": 0.47450661063194277, "time": 1.9141433715820313, "epoch": 541, "memory": 36824, "step": 169120}
{"lr": 0.0004173943133355341, "data_time": 0.00262908935546875, "grad_norm": 0.26383646577596664, "loss": 0.47771045565605164, "time": 1.919266700744629, "epoch": 541, "memory": 36824, "step": 169220}
{"lr": 0.00041646678142156644, "data_time": 0.0035542964935302733, "grad_norm": 0.27532059848308565, "loss": 0.47364482283592224, "time": 1.9244771957397462, "epoch": 541, "memory": 36824, "step": 169320}
{"lr": 0.0004154194754782799, "data_time": 0.3933766603469849, "grad_norm": 0.284802308678627, "loss": 0.4712408870458603, "time": 2.119352197647095, "epoch": 542, "memory": 36824, "step": 169433}
{"lr": 0.00041449337035075846, "data_time": 0.0024948596954345705, "grad_norm": 0.28306309282779696, "loss": 0.47388558089733124, "time": 1.9193322896957397, "epoch": 542, "memory": 36824, "step": 169533}
{"lr": 0.0004135679376046109, "data_time": 0.002975153923034668, "grad_norm": 0.2760331824421883, "loss": 0.473933145403862, "time": 1.9622307538986206, "epoch": 542, "memory": 36824, "step": 169633}
{"lr": 0.0004125230098054342, "data_time": 0.35671942234039306, "grad_norm": 0.2923541009426117, "loss": 0.4778996825218201, "time": 2.372179412841797, "epoch": 543, "memory": 36824, "step": 169746}
{"lr": 0.00041159901461063504, "data_time": 0.0026736736297607424, "grad_norm": 0.2973202720284462, "loss": 0.47764992117881777, "time": 1.8714571714401245, "epoch": 543, "memory": 36824, "step": 169846}
{"lr": 0.00041067569684539963, "data_time": 0.0032230615615844727, "grad_norm": 0.29209437519311904, "loss": 0.4728114366531372, "time": 2.050972270965576, "epoch": 543, "memory": 36824, "step": 169946}
{"lr": 0.0004096331650452665, "data_time": 0.0024715423583984374, "grad_norm": 0.29828244596719744, "loss": 0.47721886038780215, "time": 1.905391764640808, "epoch": 544, "memory": 36824, "step": 170059}
{"lr": 0.00040871129557171687, "data_time": 0.0025441646575927734, "grad_norm": 0.28099333494901657, "loss": 0.4794298857450485, "time": 1.879829216003418, "epoch": 544, "memory": 36824, "step": 170159}
{"lr": 0.00040779010856434604, "data_time": 0.0033020734786987304, "grad_norm": 0.27726870626211164, "loss": 0.4795833557844162, "time": 1.9571755886077882, "epoch": 544, "memory": 36824, "step": 170259}
{"lr": 0.00040674999057724797, "data_time": 0.16894726753234862, "grad_norm": 0.30779664516448973, "loss": 0.4767060190439224, "time": 1.9310702800750732, "epoch": 545, "memory": 36824, "step": 170372}
{"lr": 0.0004058302625771546, "data_time": 0.0025275707244873046, "grad_norm": 0.2970499098300934, "loss": 0.4795049577951431, "time": 1.9314925909042358, "epoch": 545, "memory": 36824, "step": 170472}
{"lr": 0.0004049112220681932, "data_time": 0.003048133850097656, "grad_norm": 0.3092763379216194, "loss": 0.4753663033246994, "time": 2.020088481903076, "epoch": 545, "memory": 36824, "step": 170572}
{"lr": 0.00040387353566687735, "data_time": 0.10156042575836181, "grad_norm": 0.26688182055950166, "loss": 0.4755285054445267, "time": 1.9278846979141235, "epoch": 546, "memory": 36824, "step": 170685}
{"lr": 0.0004029559648558537, "data_time": 0.0025244951248168945, "grad_norm": 0.2685573026537895, "loss": 0.47812171280384064, "time": 1.9093283653259276, "epoch": 546, "memory": 36824, "step": 170785}
{"lr": 0.0004020390865491685, "data_time": 0.00319516658782959, "grad_norm": 0.2663893401622772, "loss": 0.47131815552711487, "time": 2.280933666229248, "epoch": 546, "memory": 36824, "step": 170885}
{"lr": 0.0004010038494648317, "data_time": 0.002648186683654785, "grad_norm": 0.27554464936256406, "loss": 0.4752112925052643, "time": 1.8325278520584107, "epoch": 547, "memory": 36824, "step": 170998}
{"lr": 0.0004000884515216318, "data_time": 0.06595549583435059, "grad_norm": 0.2960213184356689, "loss": 0.4720136016607285, "time": 3.293089246749878, "epoch": 547, "memory": 36824, "step": 171098}
{"lr": 0.00039917375108414356, "data_time": 0.002830243110656738, "grad_norm": 0.29360448122024535, "loss": 0.4718087077140808, "time": 2.56686692237854, "epoch": 547, "memory": 36824, "step": 171198}
{"lr": 0.0003981409810061321, "data_time": 0.23151013851165772, "grad_norm": 0.2896519973874092, "loss": 0.4768611162900925, "time": 1.8331964254379272, "epoch": 548, "memory": 36824, "step": 171311}
{"lr": 0.00039722777157238074, "data_time": 0.0024596691131591798, "grad_norm": 0.27016113251447677, "loss": 0.4774429023265839, "time": 1.9017114162445068, "epoch": 548, "memory": 36824, "step": 171411}
{"lr": 0.00039631526463379524, "data_time": 0.0027534008026123048, "grad_norm": 0.299195796251297, "loss": 0.47529852092266084, "time": 1.8828830480575562, "epoch": 548, "memory": 36824, "step": 171511}
{"lr": 0.0003952849792092999, "data_time": 0.06391689777374268, "grad_norm": 0.28876060247421265, "loss": 0.47834199368953706, "time": 1.8803303480148315, "epoch": 549, "memory": 36824, "step": 171624}
{"lr": 0.000394373973889226, "data_time": 0.002577996253967285, "grad_norm": 0.2883513405919075, "loss": 0.4769615471363068, "time": 1.898914670944214, "epoch": 549, "memory": 36824, "step": 171724}
{"lr": 0.00039346367604177013, "data_time": 0.003519558906555176, "grad_norm": 0.3013029932975769, "loss": 0.47548857033252717, "time": 1.8108235120773315, "epoch": 549, "memory": 36824, "step": 171824}
{"lr": 0.0003924358928755261, "data_time": 0.2484494924545288, "grad_norm": 0.2668301001191139, "loss": 0.47671829760074613, "time": 1.903360867500305, "epoch": 550, "memory": 36824, "step": 171937}
{"lr": 0.0003915271072356977, "data_time": 0.002476930618286133, "grad_norm": 0.29166322350502016, "loss": 0.4721150517463684, "time": 2.1622517108917236, "epoch": 550, "memory": 36824, "step": 172037}
{"lr": 0.0003906190340338492, "data_time": 0.003194236755371094, "grad_norm": 0.2862699419260025, "loss": 0.47301331758499143, "time": 1.8962794542312622, "epoch": 550, "memory": 36824, "step": 172137}
{"lr": 0.0003895937706878349, "data_time": 0.020311141014099122, "grad_norm": 0.3014521777629852, "loss": 0.4742905080318451, "time": 1.8423464059829713, "epoch": 551, "memory": 36824, "step": 172250}
{"lr": 0.0003886872202568914, "data_time": 0.0025412797927856444, "grad_norm": 0.3121239960193634, "loss": 0.4726468980312347, "time": 1.839902138710022, "epoch": 551, "memory": 36824, "step": 172350}
{"lr": 0.0003877813872171162, "data_time": 0.004591631889343262, "grad_norm": 0.30357581526041033, "loss": 0.4804008275270462, "time": 1.9048555135726928, "epoch": 551, "memory": 36824, "step": 172450}
{"lr": 0.00038675866121025355, "data_time": 0.3575555086135864, "grad_norm": 0.315361861884594, "loss": 0.4736102372407913, "time": 1.9289282083511352, "epoch": 552, "memory": 36824, "step": 172563}
{"lr": 0.00038585436147864, "data_time": 0.0025908708572387694, "grad_norm": 0.2829480141401291, "loss": 0.476550766825676, "time": 1.9713054180145264, "epoch": 552, "memory": 36824, "step": 172663}
{"lr": 0.00038495078407912596, "data_time": 0.0034179449081420898, "grad_norm": 0.32345799207687376, "loss": 0.47912994027137756, "time": 1.8954566240310669, "epoch": 552, "memory": 36824, "step": 172763}
{"lr": 0.0003839306128869813, "data_time": 0.3852695941925049, "grad_norm": 0.2987454503774643, "loss": 0.4733740448951721, "time": 1.9343790531158447, "epoch": 553, "memory": 36824, "step": 172876}
{"lr": 0.000383028579306685, "data_time": 0.0025635957717895508, "grad_norm": 0.3174153000116348, "loss": 0.4747701704502106, "time": 1.926016640663147, "epoch": 553, "memory": 36824, "step": 172976}
{"lr": 0.00038212727298707664, "data_time": 0.003048992156982422, "grad_norm": 0.2795857682824135, "loss": 0.4742549657821655, "time": 2.144806909561157, "epoch": 553, "memory": 36824, "step": 173076}
{"lr": 0.00038110967404156, "data_time": 0.0028043746948242187, "grad_norm": 0.30167352557182314, "loss": 0.47787500321865084, "time": 1.941445231437683, "epoch": 554, "memory": 36824, "step": 173189}
{"lr": 0.00038020992202584695, "data_time": 0.0026016950607299803, "grad_norm": 0.28541225492954253, "loss": 0.47628786861896516, "time": 1.9035578966140747, "epoch": 554, "memory": 36824, "step": 173289}
{"lr": 0.0003793109021869819, "data_time": 0.0027058601379394533, "grad_norm": 0.2888037547469139, "loss": 0.4760448068380356, "time": 1.914830732345581, "epoch": 554, "memory": 36824, "step": 173389}
{"lr": 0.00037829589287605197, "data_time": 0.3817894697189331, "grad_norm": 0.31768079102039337, "loss": 0.4745886504650116, "time": 1.9568049669265748, "epoch": 555, "memory": 36824, "step": 173502}
{"lr": 0.00037739843779920057, "data_time": 0.18306620121002198, "grad_norm": 0.27939818799495697, "loss": 0.4778269827365875, "time": 2.019706892967224, "epoch": 555, "memory": 36824, "step": 173602}
{"lr": 0.00037650171980284765, "data_time": 0.0028199672698974608, "grad_norm": 0.2965004101395607, "loss": 0.478664231300354, "time": 1.906264066696167, "epoch": 555, "memory": 36824, "step": 173702}
{"lr": 0.000375489317470213, "data_time": 0.17199404239654542, "grad_norm": 0.3307729005813599, "loss": 0.4718409091234207, "time": 1.9205250024795533, "epoch": 556, "memory": 36824, "step": 173815}
{"lr": 0.0003745941746672545, "data_time": 0.00279538631439209, "grad_norm": 0.29643907994031904, "loss": 0.4725873708724976, "time": 1.8772188186645509, "epoch": 556, "memory": 36824, "step": 173915}
{"lr": 0.0003736997738358504, "data_time": 0.0028096914291381838, "grad_norm": 0.2980308562517166, "loss": 0.47722322642803194, "time": 1.9658504486083985, "epoch": 556, "memory": 36824, "step": 174015}
{"lr": 0.00037268999578067346, "data_time": 0.06558856964111329, "grad_norm": 0.30428526997566224, "loss": 0.4729691296815872, "time": 1.8950915098190309, "epoch": 557, "memory": 36824, "step": 174128}
{"lr": 0.00037179718054712943, "data_time": 0.002731180191040039, "grad_norm": 0.31210300922393797, "loss": 0.47918486297130586, "time": 1.9640520572662354, "epoch": 557, "memory": 36824, "step": 174228}
{"lr": 0.00037090511216351615, "data_time": 0.0034232378005981446, "grad_norm": 0.26304100453853607, "loss": 0.47405165433883667, "time": 1.794101095199585, "epoch": 557, "memory": 36824, "step": 174328}
{"lr": 0.0003698979756401179, "data_time": 0.12318208217620849, "grad_norm": 0.28684408217668533, "loss": 0.4787277549505234, "time": 1.9080368995666503, "epoch": 558, "memory": 36824, "step": 174441}
{"lr": 0.00036900750323173685, "data_time": 0.002406811714172363, "grad_norm": 0.3200157850980759, "loss": 0.475348761677742, "time": 1.9378250598907472, "epoch": 558, "memory": 36824, "step": 174541}
{"lr": 0.0003681177825389022, "data_time": 0.003041863441467285, "grad_norm": 0.31074552834033964, "loss": 0.4758454948663712, "time": 1.8292719841003418, "epoch": 558, "memory": 36824, "step": 174641}
{"lr": 0.00036711330475646737, "data_time": 0.27402782440185547, "grad_norm": 0.28101556152105334, "loss": 0.4750701755285263, "time": 1.862202525138855, "epoch": 559, "memory": 36824, "step": 174754}
{"lr": 0.00036622519038896547, "data_time": 0.0024442434310913085, "grad_norm": 0.32060140669345855, "loss": 0.47782175838947294, "time": 1.9134102582931518, "epoch": 559, "memory": 36824, "step": 174854}
{"lr": 0.00036533783258978106, "data_time": 0.003030133247375488, "grad_norm": 0.3184348836541176, "loss": 0.47762106359004974, "time": 2.1759499311447144, "epoch": 559, "memory": 36824, "step": 174954}
{"lr": 0.000364336030712063, "data_time": 0.08255085945129395, "grad_norm": 0.2833505541086197, "loss": 0.47476630508899687, "time": 1.9056951999664307, "epoch": 560, "memory": 36824, "step": 175067}
{"lr": 0.00036345028956086506, "data_time": 0.0025592327117919924, "grad_norm": 0.28720563352108003, "loss": 0.4768215507268906, "time": 1.878887677192688, "epoch": 560, "memory": 36824, "step": 175167}
{"lr": 0.0003625653098178269, "data_time": 0.004001617431640625, "grad_norm": 0.30335749983787536, "loss": 0.4740340501070023, "time": 1.8415982961654662, "epoch": 560, "memory": 36824, "step": 175267}
{"lr": 0.00036156620096285553, "data_time": 0.002599501609802246, "grad_norm": 0.2829828873276711, "loss": 0.47749132215976714, "time": 1.872734832763672, "epoch": 561, "memory": 36824, "step": 175380}
{"lr": 0.0003606828481628337, "data_time": 0.0024297237396240234, "grad_norm": 0.3298044502735138, "loss": 0.47790123522281647, "time": 1.9374347925186157, "epoch": 561, "memory": 36824, "step": 175480}
{"lr": 0.0003598002615978047, "data_time": 0.0028947830200195313, "grad_norm": 0.2854736685752869, "loss": 0.4711600184440613, "time": 2.2105284214019774, "epoch": 561, "memory": 36824, "step": 175580}
{"lr": 0.0003588038628375937, "data_time": 0.4399502038955688, "grad_norm": 0.3046130910515785, "loss": 0.4754908293485641, "time": 1.8713623523712157, "epoch": 562, "memory": 36824, "step": 175693}
{"lr": 0.00035792291348280973, "data_time": 0.0027073144912719725, "grad_norm": 0.30853894650936126, "loss": 0.47747316062450407, "time": 1.899024534225464, "epoch": 562, "memory": 36824, "step": 175793}
{"lr": 0.00035704273517675873, "data_time": 0.0029853343963623046, "grad_norm": 0.3092548757791519, "loss": 0.4755817800760269, "time": 1.8901928901672362, "epoch": 562, "memory": 36824, "step": 175893}
{"lr": 0.0003560490635370147, "data_time": 0.2470013380050659, "grad_norm": 0.30064006298780444, "loss": 0.47775780260562895, "time": 1.811233687400818, "epoch": 563, "memory": 36824, "step": 176006}
{"lr": 0.0003551705326804625, "data_time": 0.0032042503356933595, "grad_norm": 0.287808333337307, "loss": 0.47708740234375, "time": 1.9137047052383422, "epoch": 563, "memory": 36824, "step": 176106}
{"lr": 0.00035429277767320863, "data_time": 0.002887535095214844, "grad_norm": 0.29694330841302874, "loss": 0.47809491455554964, "time": 1.8500285387039184, "epoch": 563, "memory": 36824, "step": 176206}
{"lr": 0.00035330185013303714, "data_time": 0.16155638694763183, "grad_norm": 0.2721375569701195, "loss": 0.4744713842868805, "time": 1.9715344667434693, "epoch": 564, "memory": 36824, "step": 176319}
{"lr": 0.0003524257527863858, "data_time": 0.0029876708984375, "grad_norm": 0.31784220337867736, "loss": 0.4734184116125107, "time": 1.8934635877609254, "epoch": 564, "memory": 36824, "step": 176419}
{"lr": 0.00035155043607634003, "data_time": 0.0037302255630493166, "grad_norm": 0.3191536575555801, "loss": 0.4767117589712143, "time": 1.8481926202774048, "epoch": 564, "memory": 36824, "step": 176519}
{"lr": 0.000350562269567959, "data_time": 0.14004004001617432, "grad_norm": 0.2810735061764717, "loss": 0.48078409731388094, "time": 1.8596704244613647, "epoch": 565, "memory": 36824, "step": 176632}
{"lr": 0.0003496886207012939, "data_time": 0.002876591682434082, "grad_norm": 0.29093275368213656, "loss": 0.4758821874856949, "time": 2.0307822465896606, "epoch": 565, "memory": 36824, "step": 176732}
{"lr": 0.0003488157572452036, "data_time": 0.0035956621170043944, "grad_norm": 0.2969455599784851, "loss": 0.4762825548648834, "time": 1.8748966455459595, "epoch": 565, "memory": 36824, "step": 176832}
{"lr": 0.0003478303686536526, "data_time": 0.4307488679885864, "grad_norm": 0.2848468691110611, "loss": 0.4751704066991806, "time": 1.8236518621444702, "epoch": 566, "memory": 36824, "step": 176945}
{"lr": 0.000346959183195223, "data_time": 0.032195663452148436, "grad_norm": 0.29160783886909486, "loss": 0.47624422013759615, "time": 1.8110680103302002, "epoch": 566, "memory": 36824, "step": 177045}
{"lr": 0.0003460887879079167, "data_time": 0.002856159210205078, "grad_norm": 0.30555116534233095, "loss": 0.4787142425775528, "time": 1.747782039642334, "epoch": 566, "memory": 36824, "step": 177145}
{"lr": 0.00034510619407076773, "data_time": 0.30193395614624025, "grad_norm": 0.3056704834103584, "loss": 0.47320670187473296, "time": 1.820689058303833, "epoch": 567, "memory": 36824, "step": 177258}
{"lr": 0.00034423748690672924, "data_time": 0.0024024486541748048, "grad_norm": 0.3017500787973404, "loss": 0.47527694404125215, "time": 2.0061342477798463, "epoch": 567, "memory": 36824, "step": 177358}
{"lr": 0.00034336957466086067, "data_time": 0.0038463830947875976, "grad_norm": 0.28906037509441374, "loss": 0.4789039134979248, "time": 1.7921908378601075, "epoch": 567, "memory": 36824, "step": 177458}
{"lr": 0.00034238979236793143, "data_time": 0.19722166061401367, "grad_norm": 0.2988493740558624, "loss": 0.4782023698091507, "time": 1.8570617198944093, "epoch": 568, "memory": 36824, "step": 177571}
{"lr": 0.00034152357834209344, "data_time": 0.002599644660949707, "grad_norm": 0.32033962160348894, "loss": 0.4762566477060318, "time": 1.840844202041626, "epoch": 568, "memory": 36824, "step": 177671}
{"lr": 0.0003406581639678881, "data_time": 0.003038477897644043, "grad_norm": 0.284700508415699, "loss": 0.4764579892158508, "time": 1.8340314149856567, "epoch": 568, "memory": 36824, "step": 177771}
{"lr": 0.00033968120996095475, "data_time": 0.2606779098510742, "grad_norm": 0.31248058676719664, "loss": 0.4695393472909927, "time": 1.9202452659606934, "epoch": 569, "memory": 36824, "step": 177884}
{"lr": 0.000338817503874524, "data_time": 0.0025182247161865236, "grad_norm": 0.29729338437318803, "loss": 0.47286731004714966, "time": 1.8581979036331178, "epoch": 569, "memory": 36824, "step": 177984}
{"lr": 0.0003379546021595258, "data_time": 0.003473997116088867, "grad_norm": 0.2933802559971809, "loss": 0.4782188445329666, "time": 1.7873104095458985, "epoch": 569, "memory": 36824, "step": 178084}
{"lr": 0.00033698049313203625, "data_time": 0.515709662437439, "grad_norm": 0.2886302277445793, "loss": 0.47555056512355803, "time": 1.795027256011963, "epoch": 570, "memory": 36824, "step": 178197}
{"lr": 0.00033611930974336703, "data_time": 0.0025981903076171876, "grad_norm": 0.296350459754467, "loss": 0.475667679309845, "time": 1.8439364910125733, "epoch": 570, "memory": 36824, "step": 178297}
{"lr": 0.0003352589354321861, "data_time": 0.0027839183807373048, "grad_norm": 0.32920039594173434, "loss": 0.4766042321920395, "time": 1.810349202156067, "epoch": 570, "memory": 36824, "step": 178397}
{"lr": 0.0003342876880289764, "data_time": 0.24263129234313965, "grad_norm": 0.29902747422456744, "loss": 0.4815668135881424, "time": 1.8322132110595704, "epoch": 571, "memory": 36824, "step": 178510}
{"lr": 0.0003334290420533169, "data_time": 0.002639031410217285, "grad_norm": 0.3270377337932587, "loss": 0.4771753251552582, "time": 1.8266477823257445, "epoch": 571, "memory": 36824, "step": 178610}
{"lr": 0.0003325712098473757, "data_time": 0.002945566177368164, "grad_norm": 0.34224569201469424, "loss": 0.47243468165397645, "time": 1.8701061487197876, "epoch": 571, "memory": 36824, "step": 178710}
{"lr": 0.0003316028406643838, "data_time": 0.3159895181655884, "grad_norm": 0.28493667095899583, "loss": 0.47119749784469606, "time": 1.852152442932129, "epoch": 572, "memory": 36824, "step": 178823}
{"lr": 0.0003307467467736248, "data_time": 0.002525138854980469, "grad_norm": 0.29392545819282534, "loss": 0.473789244890213, "time": 1.8266319274902343, "epoch": 572, "memory": 36824, "step": 178923}
{"lr": 0.0003298914713309092, "data_time": 0.0029347896575927734, "grad_norm": 0.29707077890634537, "loss": 0.4723765730857849, "time": 1.7825214385986328, "epoch": 572, "memory": 36824, "step": 179023}
{"lr": 0.0003289259969148938, "data_time": 0.26739342212677003, "grad_norm": 0.29839442372322084, "loss": 0.47528516948223115, "time": 1.8669886350631715, "epoch": 573, "memory": 36824, "step": 179136}
{"lr": 0.00032807246973731724, "data_time": 0.002440786361694336, "grad_norm": 0.3089224427938461, "loss": 0.4724069356918335, "time": 1.8750295639038086, "epoch": 573, "memory": 36824, "step": 179236}
{"lr": 0.00032721976567212424, "data_time": 0.002832317352294922, "grad_norm": 0.3134785532951355, "loss": 0.4739630252122879, "time": 1.9156370639801026, "epoch": 573, "memory": 36824, "step": 179336}
{"lr": 0.0003262572025203779, "data_time": 0.20419526100158691, "grad_norm": 0.2808703288435936, "loss": 0.47368588447570803, "time": 1.8845566034317016, "epoch": 574, "memory": 36824, "step": 179449}
{"lr": 0.0003254062566404088, "data_time": 0.0029889345169067383, "grad_norm": 0.31990667283535, "loss": 0.4738878279924393, "time": 1.8906834125518799, "epoch": 574, "memory": 36824, "step": 179549}
{"lr": 0.0003245561385230983, "data_time": 0.0034313440322875977, "grad_norm": 0.32286964952945707, "loss": 0.472382590174675, "time": 1.9022082090377808, "epoch": 574, "memory": 36824, "step": 179649}
{"lr": 0.0003235965030831701, "data_time": 0.46581807136535647, "grad_norm": 0.29891141057014464, "loss": 0.47633372247219086, "time": 2.125790810585022, "epoch": 575, "memory": 36824, "step": 179762}
{"lr": 0.0003227481530411253, "data_time": 0.0026783466339111326, "grad_norm": 0.2835345149040222, "loss": 0.47636803090572355, "time": 1.9509448289871216, "epoch": 575, "memory": 36824, "step": 179862}
{"lr": 0.0003219006353978707, "data_time": 0.0028354883193969726, "grad_norm": 0.3079924494028091, "loss": 0.4749114751815796, "time": 1.887129807472229, "epoch": 575, "memory": 36824, "step": 179962}
{"lr": 0.00032094394406728254, "data_time": 0.09025404453277588, "grad_norm": 0.28176419883966447, "loss": 0.47466872334480287, "time": 1.8699352741241455, "epoch": 576, "memory": 36824, "step": 180075}
{"lr": 0.0003200982043591239, "data_time": 0.0025463104248046875, "grad_norm": 0.3273096889257431, "loss": 0.4749025523662567, "time": 1.8070024728775025, "epoch": 576, "memory": 36824, "step": 180175}
{"lr": 0.00031925330167166364, "data_time": 0.0029749631881713866, "grad_norm": 0.35016782879829406, "loss": 0.4745357662439346, "time": 1.8610684633255006, "epoch": 576, "memory": 36824, "step": 180275}
{"lr": 0.00031829957079763076, "data_time": 0.05078282356262207, "grad_norm": 0.33742217123508456, "loss": 0.4780629098415375, "time": 1.8082670211791991, "epoch": 577, "memory": 36824, "step": 180388}
{"lr": 0.000317456455874716, "data_time": 0.0026829004287719726, "grad_norm": 0.30272565633058546, "loss": 0.4753147065639496, "time": 1.9151196241378785, "epoch": 577, "memory": 36824, "step": 180488}
{"lr": 0.00031661418258010566, "data_time": 0.003026723861694336, "grad_norm": 0.3308349996805191, "loss": 0.47445154190063477, "time": 1.8978360891342163, "epoch": 577, "memory": 36824, "step": 180588}
{"lr": 0.00031566342845925705, "data_time": 0.0027124643325805663, "grad_norm": 0.2851553201675415, "loss": 0.4743891716003418, "time": 1.8700248718261718, "epoch": 578, "memory": 36824, "step": 180701}
{"lr": 0.0003148229527280944, "data_time": 0.002482891082763672, "grad_norm": 0.3066308796405792, "loss": 0.4742645889520645, "time": 1.8747976779937745, "epoch": 578, "memory": 36824, "step": 180801}
{"lr": 0.00031398332321846, "data_time": 0.0036889076232910155, "grad_norm": 0.2960257321596146, "loss": 0.4721024990081787, "time": 1.8903092861175537, "epoch": 578, "memory": 36824, "step": 180901}
{"lr": 0.00031303556209656024, "data_time": 0.4023797035217285, "grad_norm": 0.3130301460623741, "loss": 0.471848064661026, "time": 1.8635557889938354, "epoch": 579, "memory": 36824, "step": 181014}
{"lr": 0.00031219773991856116, "data_time": 0.002734804153442383, "grad_norm": 0.30159473419189453, "loss": 0.4764850795269012, "time": 1.8101086139678955, "epoch": 579, "memory": 36824, "step": 181114}
{"lr": 0.00031136076854085444, "data_time": 0.0030339956283569336, "grad_norm": 0.28726907521486283, "loss": 0.47357842028141023, "time": 1.8446716785430908, "epoch": 579, "memory": 36824, "step": 181214}
{"lr": 0.0003104160166125272, "data_time": 0.027281427383422853, "grad_norm": 0.29285117238759995, "loss": 0.4762634813785553, "time": 1.8097118377685546, "epoch": 580, "memory": 36824, "step": 181327}
{"lr": 0.0003095808623037604, "data_time": 0.0026076316833496095, "grad_norm": 0.29417153894901277, "loss": 0.4711811304092407, "time": 1.7992803573608398, "epoch": 580, "memory": 36824, "step": 181427}
{"lr": 0.00030874656335951473, "data_time": 0.0035650014877319338, "grad_norm": 0.3102514028549194, "loss": 0.47542455792427063, "time": 1.9143587350845337, "epoch": 580, "memory": 36824, "step": 181527}
{"lr": 0.000307804836767963, "data_time": 0.26944694519042967, "grad_norm": 0.29494078308343885, "loss": 0.47895432710647584, "time": 1.9081735372543336, "epoch": 581, "memory": 36824, "step": 181640}
{"lr": 0.00030697236459891207, "data_time": 0.0027088642120361326, "grad_norm": 0.30059521794319155, "loss": 0.47656553983688354, "time": 1.7942286491394044, "epoch": 581, "memory": 36824, "step": 181740}
{"lr": 0.0003061407523439921, "data_time": 0.003968238830566406, "grad_norm": 0.2781252399086952, "loss": 0.4712205410003662, "time": 1.835785460472107, "epoch": 581, "memory": 36824, "step": 181840}
{"lr": 0.00030520206718072504, "data_time": 0.002665805816650391, "grad_norm": 0.2884072601795197, "loss": 0.4710076302289963, "time": 1.8754480361938477, "epoch": 582, "memory": 36824, "step": 181953}
{"lr": 0.0003043722913760422, "data_time": 0.0026056051254272463, "grad_norm": 0.31526939868927, "loss": 0.4755634695291519, "time": 1.925994348526001, "epoch": 582, "memory": 36824, "step": 182053}
{"lr": 0.0003035433800204083, "data_time": 0.10054783821105957, "grad_norm": 0.29393370300531385, "loss": 0.4751526743173599, "time": 1.8886019706726074, "epoch": 582, "memory": 36824, "step": 182153}
{"lr": 0.0003026077523249654, "data_time": 0.004444575309753418, "grad_norm": 0.3415354430675507, "loss": 0.4734936267137527, "time": 1.8122379302978515, "epoch": 583, "memory": 36824, "step": 182266}
{"lr": 0.00030178068706322936, "data_time": 0.0026466846466064453, "grad_norm": 0.29935030490159986, "loss": 0.47749372720718386, "time": 1.8602335453033447, "epoch": 583, "memory": 36824, "step": 182366}
{"lr": 0.0003009544907706885, "data_time": 0.003710341453552246, "grad_norm": 0.2928850531578064, "loss": 0.4747608363628387, "time": 1.857705569267273, "epoch": 583, "memory": 36824, "step": 182466}
{"lr": 0.00030002193653036555, "data_time": 0.0025153160095214844, "grad_norm": 0.29898093044757845, "loss": 0.47277838587760923, "time": 1.8123596429824829, "epoch": 584, "memory": 36824, "step": 182579}
{"lr": 0.00029919759594384003, "data_time": 0.0025611162185668946, "grad_norm": 0.2990129470825195, "loss": 0.4725282460451126, "time": 1.82357759475708, "epoch": 584, "memory": 36824, "step": 182679}
{"lr": 0.000298374128831808, "data_time": 0.0033314704895019533, "grad_norm": 0.3087871104478836, "loss": 0.47164960503578185, "time": 1.7851374387741088, "epoch": 584, "memory": 36824, "step": 182779}
{"lr": 0.0002974446639813838, "data_time": 0.5357450485229492, "grad_norm": 0.2844745874404907, "loss": 0.47408901154994965, "time": 1.8647143125534058, "epoch": 585, "memory": 36824, "step": 182892}
{"lr": 0.00029662306215577535, "data_time": 0.09786853790283204, "grad_norm": 0.3397137403488159, "loss": 0.47590217292308806, "time": 1.8924139499664308, "epoch": 585, "memory": 36824, "step": 182992}
{"lr": 0.00029580233829503306, "data_time": 0.003065204620361328, "grad_norm": 0.28971735686063765, "loss": 0.47432767152786254, "time": 1.8409920454025268, "epoch": 585, "memory": 36824, "step": 183092}
{"lr": 0.0002948759787164981, "data_time": 0.09721150398254394, "grad_norm": 0.3152433156967163, "loss": 0.4736765384674072, "time": 1.8487207651138307, "epoch": 586, "memory": 36824, "step": 183205}
{"lr": 0.00029405712969071435, "data_time": 0.002753758430480957, "grad_norm": 0.307243275642395, "loss": 0.4775730162858963, "time": 1.8802298784255982, "epoch": 586, "memory": 36824, "step": 183305}
{"lr": 0.00029323916310516836, "data_time": 0.003257417678833008, "grad_norm": 0.33281399607658385, "loss": 0.475247910618782, "time": 1.8747421503067017, "epoch": 586, "memory": 36824, "step": 183405}
{"lr": 0.0002923159246274517, "data_time": 0.07520391941070556, "grad_norm": 0.2860890358686447, "loss": 0.47006859481334684, "time": 1.8785593032836914, "epoch": 587, "memory": 36824, "step": 183518}
{"lr": 0.0002914998423933635, "data_time": 0.0026901721954345702, "grad_norm": 0.30316841751337054, "loss": 0.4756841570138931, "time": 1.8694353818893432, "epoch": 587, "memory": 36824, "step": 183618}
{"lr": 0.0002906846470598056, "data_time": 0.002927875518798828, "grad_norm": 0.2845732897520065, "loss": 0.4772707402706146, "time": 1.840494704246521, "epoch": 587, "memory": 36824, "step": 183718}
{"lr": 0.0002897645454585049, "data_time": 0.1594334363937378, "grad_norm": 0.3007627993822098, "loss": 0.4739915281534195, "time": 1.8383538246154785, "epoch": 588, "memory": 36824, "step": 183831}
{"lr": 0.00028895124396070636, "data_time": 0.0025954484939575196, "grad_norm": 0.3065718948841095, "loss": 0.47031536400318147, "time": 1.8235094785690307, "epoch": 588, "memory": 36824, "step": 183931}
{"lr": 0.0002881388338085758, "data_time": 0.0028676509857177733, "grad_norm": 0.3138791501522064, "loss": 0.4770618945360184, "time": 2.2142953872680664, "epoch": 588, "memory": 36824, "step": 184031}
{"lr": 0.00028722188480568916, "data_time": 0.040552830696105956, "grad_norm": 0.30696752965450286, "loss": 0.4719316929578781, "time": 1.8841192722320557, "epoch": 589, "memory": 36824, "step": 184144}
{"lr": 0.00028641137794125856, "data_time": 0.002718496322631836, "grad_norm": 0.28971784114837645, "loss": 0.4775078952312469, "time": 1.8339307308197021, "epoch": 589, "memory": 36824, "step": 184244}
{"lr": 0.000285601766852403, "data_time": 0.0034370183944702148, "grad_norm": 0.30085892528295516, "loss": 0.4750241994857788, "time": 2.0466399669647215, "epoch": 589, "memory": 36824, "step": 184344}
{"lr": 0.0002846879861160591, "data_time": 0.16143951416015626, "grad_norm": 0.3006537646055222, "loss": 0.47590462267398836, "time": 1.7989174127578735, "epoch": 590, "memory": 36824, "step": 184457}
{"lr": 0.0002838802877343224, "data_time": 0.0038802385330200194, "grad_norm": 0.29289563894271853, "loss": 0.4747682958841324, "time": 1.8056663036346436, "epoch": 590, "memory": 36824, "step": 184557}
{"lr": 0.00028307348954276186, "data_time": 0.003227567672729492, "grad_norm": 0.30464592278003694, "loss": 0.4798448085784912, "time": 2.3870207548141478, "epoch": 590, "memory": 36824, "step": 184657}
{"lr": 0.0002821628926869519, "data_time": 0.13686304092407225, "grad_norm": 0.28802177011966706, "loss": 0.4750978142023087, "time": 1.820388126373291, "epoch": 591, "memory": 36824, "step": 184770}
{"lr": 0.000281358016589246, "data_time": 0.0026371955871582033, "grad_norm": 0.29161920994520185, "loss": 0.4744908481836319, "time": 1.8189449071884156, "epoch": 591, "memory": 36824, "step": 184870}
{"lr": 0.0002805540450809356, "data_time": 0.003045177459716797, "grad_norm": 0.3169620081782341, "loss": 0.4673062711954117, "time": 1.9983373165130616, "epoch": 591, "memory": 36824, "step": 184970}
{"lr": 0.0002796466476652483, "data_time": 0.0026121616363525392, "grad_norm": 0.317911759018898, "loss": 0.4757393360137939, "time": 1.8203862667083741, "epoch": 592, "memory": 36824, "step": 185083}
{"lr": 0.00027884460760468465, "data_time": 0.0026397943496704102, "grad_norm": 0.3040279895067215, "loss": 0.47532725930213926, "time": 1.7683025598526, "epoch": 592, "memory": 36824, "step": 185183}
{"lr": 0.00027804347651727844, "data_time": 0.003845810890197754, "grad_norm": 0.31046708971261977, "loss": 0.4748360216617584, "time": 2.1109762668609617, "epoch": 592, "memory": 36824, "step": 185283}
{"lr": 0.00027713929404663126, "data_time": 0.1975860595703125, "grad_norm": 0.3058715522289276, "loss": 0.47470404505729674, "time": 1.811009454727173, "epoch": 593, "memory": 36824, "step": 185396}
{"lr": 0.0002763401037278626, "data_time": 0.0025292634963989258, "grad_norm": 0.30499406158924103, "loss": 0.4750923693180084, "time": 1.847918653488159, "epoch": 593, "memory": 36824, "step": 185496}
{"lr": 0.0002755418267504814, "data_time": 0.003444528579711914, "grad_norm": 0.3277400881052017, "loss": 0.4692468285560608, "time": 1.7858790397644042, "epoch": 593, "memory": 36824, "step": 185596}
{"lr": 0.0002746408746748583, "data_time": 0.1440753936767578, "grad_norm": 0.2986285239458084, "loss": 0.47297255992889403, "time": 1.8013410329818726, "epoch": 594, "memory": 36824, "step": 185709}
{"lr": 0.00027384454775384123, "data_time": 0.06872055530548096, "grad_norm": 0.33843027651309965, "loss": 0.47466102838516233, "time": 1.8587422132492066, "epoch": 594, "memory": 36824, "step": 185809}
{"lr": 0.0002730491385268361, "data_time": 0.0029862165451049806, "grad_norm": 0.30429916381835936, "loss": 0.47089880108833315, "time": 2.1354565382003785, "epoch": 594, "memory": 36824, "step": 185909}
{"lr": 0.000272151432241022, "data_time": 0.0026232004165649414, "grad_norm": 0.31364981532096864, "loss": 0.4763627201318741, "time": 1.827134919166565, "epoch": 595, "memory": 36824, "step": 186022}
{"lr": 0.00027135798232478663, "data_time": 0.002551770210266113, "grad_norm": 0.3127815186977386, "loss": 0.4745788246393204, "time": 1.808153009414673, "epoch": 595, "memory": 36824, "step": 186122}
{"lr": 0.0002705654544395057, "data_time": 0.0027513027191162108, "grad_norm": 0.29726783335208895, "loss": 0.4761995136737823, "time": 1.8661665201187134, "epoch": 595, "memory": 36824, "step": 186222}
{"lr": 0.00026967100928282427, "data_time": 0.002580595016479492, "grad_norm": 0.2872627034783363, "loss": 0.4726324528455734, "time": 1.8288217782974243, "epoch": 596, "memory": 36824, "step": 186335}
{"lr": 0.00026888044992924023, "data_time": 0.0026043176651000975, "grad_norm": 0.3017140805721283, "loss": 0.4740166634321213, "time": 1.849203848838806, "epoch": 596, "memory": 36824, "step": 186435}
{"lr": 0.0002680908169277993, "data_time": 0.002723288536071777, "grad_norm": 0.31561010479927065, "loss": 0.47341888546943667, "time": 1.8216381072998047, "epoch": 596, "memory": 36824, "step": 186535}
{"lr": 0.00026719964818385053, "data_time": 0.003367447853088379, "grad_norm": 0.2908525109291077, "loss": 0.4759059548377991, "time": 1.8015724420547485, "epoch": 597, "memory": 36824, "step": 186648}
{"lr": 0.00026641199290139655, "data_time": 0.0028998851776123047, "grad_norm": 0.3064345896244049, "loss": 0.47189112901687624, "time": 1.7413172245025634, "epoch": 597, "memory": 36824, "step": 186748}
{"lr": 0.00026562526827644416, "data_time": 0.0031114816665649414, "grad_norm": 0.2874131664633751, "loss": 0.47680425047874453, "time": 1.7912058115005494, "epoch": 597, "memory": 36824, "step": 186848}
{"lr": 0.00026473739117284306, "data_time": 0.39231297969818113, "grad_norm": 0.3390779823064804, "loss": 0.47342115044593813, "time": 1.8890309810638428, "epoch": 598, "memory": 36824, "step": 186961}
{"lr": 0.0002639526534203747, "data_time": 0.002584362030029297, "grad_norm": 0.3371055394411087, "loss": 0.47416660785675047, "time": 1.8047608375549316, "epoch": 598, "memory": 36824, "step": 187061}
{"lr": 0.0002631688506148643, "data_time": 0.002918529510498047, "grad_norm": 0.34820989668369295, "loss": 0.47333927154541017, "time": 1.7934760093688964, "epoch": 598, "memory": 36824, "step": 187161}
{"lr": 0.00026228428032298065, "data_time": 0.00312657356262207, "grad_norm": 0.30373522639274597, "loss": 0.47161601185798646, "time": 1.8378763437271117, "epoch": 599, "memory": 36824, "step": 187274}
{"lr": 0.0002615024735095004, "data_time": 0.002615833282470703, "grad_norm": 0.31730034947395325, "loss": 0.47345520853996276, "time": 1.7862205743789672, "epoch": 599, "memory": 36824, "step": 187374}
{"lr": 0.00026072160591646, "data_time": 0.002906036376953125, "grad_norm": 0.27403854578733444, "loss": 0.4738801419734955, "time": 1.8600339889526367, "epoch": 599, "memory": 36824, "step": 187474}
{"lr": 0.0002598403575511598, "data_time": 0.31358091831207274, "grad_norm": 0.3108857676386833, "loss": 0.4744037002325058, "time": 1.7976243019104003, "epoch": 600, "memory": 36824, "step": 187587}
{"lr": 0.00025906149503558874, "data_time": 0.0027701854705810547, "grad_norm": 0.2993065044283867, "loss": 0.47133628726005555, "time": 1.843290114402771, "epoch": 600, "memory": 36824, "step": 187687}
{"lr": 0.00025828357599789044, "data_time": 0.0034922122955322265, "grad_norm": 0.2906675457954407, "loss": 0.4762153089046478, "time": 1.7498918056488038, "epoch": 600, "memory": 36824, "step": 187787}
{"lr": 0.0002574056646172763, "data_time": 0.002647876739501953, "grad_norm": 0.30486688017845154, "loss": 0.4687503844499588, "time": 1.801039218902588, "epoch": 601, "memory": 36824, "step": 187900}
{"lr": 0.00025662975970822685, "data_time": 0.002622056007385254, "grad_norm": 0.33850416243076326, "loss": 0.4768304765224457, "time": 1.9528821468353272, "epoch": 601, "memory": 36824, "step": 188000}
{"lr": 0.00025585480251836125, "data_time": 0.0028042316436767576, "grad_norm": 0.30878054201602934, "loss": 0.47174136638641356, "time": 1.8055588960647584, "epoch": 601, "memory": 36824, "step": 188100}
{"lr": 0.0002549802431235168, "data_time": 0.0026165008544921874, "grad_norm": 0.2960720077157021, "loss": 0.47461605072021484, "time": 1.8261486768722535, "epoch": 602, "memory": 36824, "step": 188213}
{"lr": 0.00025420730907906275, "data_time": 0.0023870944976806642, "grad_norm": 0.2887006223201752, "loss": 0.47625238001346587, "time": 1.8561990022659303, "epoch": 602, "memory": 36824, "step": 188313}
{"lr": 0.00025343532697890943, "data_time": 0.00292820930480957, "grad_norm": 0.29093828797340393, "loss": 0.4733910858631134, "time": 1.8753883123397828, "epoch": 602, "memory": 36824, "step": 188413}
{"lr": 0.0002525641345136419, "data_time": 0.002897834777832031, "grad_norm": 0.32203992605209353, "loss": 0.4739393085241318, "time": 1.8444545269012451, "epoch": 603, "memory": 36824, "step": 188526}
{"lr": 0.00025179418454109357, "data_time": 0.0028020858764648436, "grad_norm": 0.3078485667705536, "loss": 0.47512392699718475, "time": 1.7792094945907593, "epoch": 603, "memory": 36824, "step": 188626}
{"lr": 0.0002510251907216964, "data_time": 0.003436732292175293, "grad_norm": 0.29053598642349243, "loss": 0.47321660816669464, "time": 1.8514136791229248, "epoch": 603, "memory": 36824, "step": 188726}
{"lr": 0.00025015738007228114, "data_time": 0.27801036834716797, "grad_norm": 0.2960879594087601, "loss": 0.4724707007408142, "time": 1.839032030105591, "epoch": 604, "memory": 36824, "step": 188839}
{"lr": 0.000249390427327958, "data_time": 0.0027389287948608398, "grad_norm": 0.303201887011528, "loss": 0.4735058069229126, "time": 1.8674566507339478, "epoch": 604, "memory": 36824, "step": 188939}
{"lr": 0.0002486244349293, "data_time": 0.003059053421020508, "grad_norm": 0.31016720831394196, "loss": 0.4737697273492813, "time": 1.893990969657898, "epoch": 604, "memory": 36824, "step": 189039}
{"lr": 0.0002477600209242263, "data_time": 0.43523669242858887, "grad_norm": 0.3365249037742615, "loss": 0.472370782494545, "time": 1.827294111251831, "epoch": 605, "memory": 36824, "step": 189152}
{"lr": 0.0002469960785132358, "data_time": 0.003018021583557129, "grad_norm": 0.2986562341451645, "loss": 0.4710303574800491, "time": 1.8272321224212646, "epoch": 605, "memory": 36824, "step": 189252}
{"lr": 0.0002462331006240136, "data_time": 0.0030349016189575194, "grad_norm": 0.30459359288215637, "loss": 0.4708437114953995, "time": 1.9890379905700684, "epoch": 605, "memory": 36824, "step": 189352}
{"lr": 0.00024537209803373184, "data_time": 0.012629151344299316, "grad_norm": 0.3033536344766617, "loss": 0.47355727255344393, "time": 1.8318312883377075, "epoch": 606, "memory": 36824, "step": 189465}
{"lr": 0.0002446111790097422, "data_time": 0.0023314952850341797, "grad_norm": 0.312741781771183, "loss": 0.4724636167287827, "time": 1.934510588645935, "epoch": 606, "memory": 36824, "step": 189565}
{"lr": 0.0002438512286671429, "data_time": 0.003008437156677246, "grad_norm": 0.3136918589472771, "loss": 0.4757888734340668, "time": 1.8768419742584228, "epoch": 606, "memory": 36824, "step": 189665}
{"lr": 0.00024299365220381225, "data_time": 0.0026641130447387696, "grad_norm": 0.3367306411266327, "loss": 0.47070442140102386, "time": 1.8106754541397094, "epoch": 607, "memory": 36824, "step": 189778}
{"lr": 0.00024223576956882976, "data_time": 0.002817106246948242, "grad_norm": 0.2959106147289276, "loss": 0.47482392489910125, "time": 1.8018332958221435, "epoch": 607, "memory": 36824, "step": 189878}
{"lr": 0.00024147885975830767, "data_time": 0.0034006834030151367, "grad_norm": 0.305291011929512, "loss": 0.47862129509449003, "time": 1.8342730283737183, "epoch": 607, "memory": 36824, "step": 189978}
{"lr": 0.00024062472407554352, "data_time": 0.009122514724731445, "grad_norm": 0.2909006536006927, "loss": 0.47515781223773956, "time": 1.848618245124817, "epoch": 608, "memory": 36824, "step": 190091}
{"lr": 0.0002398698907796914, "data_time": 0.0025711774826049803, "grad_norm": Infinity, "loss": 0.4715289771556854, "time": 2.059454607963562, "epoch": 608, "memory": 36824, "step": 190191}
{"lr": 0.00023911603443474734, "data_time": 0.0030821800231933595, "grad_norm": 0.3142255812883377, "loss": 0.4714545994997025, "time": 1.9243004322052002, "epoch": 608, "memory": 36824, "step": 190291}
{"lr": 0.00023826535412737197, "data_time": 0.041434502601623534, "grad_norm": 0.2944828733801842, "loss": 0.47476964592933657, "time": 1.7632230520248413, "epoch": 609, "memory": 36824, "step": 190404}
{"lr": 0.00023751358306866823, "data_time": 0.002606654167175293, "grad_norm": 0.2912913948297501, "loss": 0.4749330013990402, "time": 1.8166226625442505, "epoch": 609, "memory": 36824, "step": 190504}
{"lr": 0.0002367627930706271, "data_time": 0.0033588647842407227, "grad_norm": 0.29753045439720155, "loss": 0.4804630964994431, "time": 1.7963139057159423, "epoch": 609, "memory": 36824, "step": 190604}
{"lr": 0.0002359155826744198, "data_time": 0.047684526443481444, "grad_norm": 0.30852157771587374, "loss": 0.47486184239387513, "time": 1.8039947509765626, "epoch": 610, "memory": 36824, "step": 190717}
{"lr": 0.00023516688669855765, "data_time": 0.003323078155517578, "grad_norm": 0.3126950979232788, "loss": 0.47315114736557007, "time": 2.0427138090133665, "epoch": 610, "memory": 36824, "step": 190817}
{"lr": 0.00023441917587634867, "data_time": 0.0035892486572265624, "grad_norm": 0.31138505041599274, "loss": 0.47663348317146303, "time": 1.8212825059890747, "epoch": 610, "memory": 36824, "step": 190917}
{"lr": 0.00023357544986779717, "data_time": 0.21319990158081054, "grad_norm": 0.3308696299791336, "loss": 0.4731507241725922, "time": 1.840300965309143, "epoch": 611, "memory": 36824, "step": 191030}
{"lr": 0.00023282984176792573, "data_time": 0.0025144338607788084, "grad_norm": 0.28462739288806915, "loss": 0.47640368342399597, "time": 1.7711353063583375, "epoch": 611, "memory": 36824, "step": 191130}
{"lr": 0.00023208522289786345, "data_time": 0.0032490253448486327, "grad_norm": 0.31354714930057526, "loss": 0.4770177274942398, "time": 1.840053391456604, "epoch": 611, "memory": 36824, "step": 191230}
{"lr": 0.0002312449956939177, "data_time": 0.00328216552734375, "grad_norm": 0.3066390842199326, "loss": 0.4725232064723969, "time": 1.7810472249984741, "epoch": 612, "memory": 36824, "step": 191343}
{"lr": 0.00023050248821042185, "data_time": 0.002908778190612793, "grad_norm": 0.30239268839359285, "loss": 0.4759441465139389, "time": 2.107394576072693, "epoch": 612, "memory": 36824, "step": 191443}
{"lr": 0.00022976097401598884, "data_time": 0.0030465126037597656, "grad_norm": 0.2919520139694214, "loss": 0.4743117868900299, "time": 1.73076651096344, "epoch": 612, "memory": 36824, "step": 191543}
{"lr": 0.0002289242599738138, "data_time": 0.16448700428009033, "grad_norm": 0.30570127367973327, "loss": 0.47181028723716734, "time": 1.8448846340179443, "epoch": 613, "memory": 36824, "step": 191656}
{"lr": 0.00022818486579409775, "data_time": 0.0026385307312011717, "grad_norm": 0.2817329794168472, "loss": 0.47656905055046084, "time": 1.8458088159561157, "epoch": 613, "memory": 36824, "step": 191756}
{"lr": 0.00022744646894572575, "data_time": 0.0037122488021850584, "grad_norm": 0.2963722199201584, "loss": 0.4724539309740067, "time": 1.8218360424041748, "epoch": 613, "memory": 36824, "step": 191856}
{"lr": 0.00022661328236245527, "data_time": 0.41362130641937256, "grad_norm": 0.32446772754192355, "loss": 0.4764247596263885, "time": 1.8444961309432983, "epoch": 614, "memory": 36824, "step": 191969}
{"lr": 0.0002258770141207254, "data_time": 0.0024956703186035157, "grad_norm": 0.31647963672876356, "loss": 0.4741975486278534, "time": 1.9793991565704345, "epoch": 614, "memory": 36824, "step": 192069}
{"lr": 0.00022514174723557901, "data_time": 0.0030487537384033202, "grad_norm": 0.3447292119264603, "loss": 0.47235398888587954, "time": 1.8742182016372682, "epoch": 614, "memory": 36824, "step": 192169}
{"lr": 0.00022431210234807313, "data_time": 0.3226155281066895, "grad_norm": 0.33403985798358915, "loss": 0.4720468044281006, "time": 1.8800223827362061, "epoch": 615, "memory": 36824, "step": 192282}
{"lr": 0.0002235789726251224, "data_time": 0.003005719184875488, "grad_norm": 0.30740230083465575, "loss": 0.4753202676773071, "time": 1.870794129371643, "epoch": 615, "memory": 36824, "step": 192382}
{"lr": 0.0002228468482668842, "data_time": 0.003092503547668457, "grad_norm": 0.2865526631474495, "loss": 0.4756351739168167, "time": 1.8022660493850708, "epoch": 615, "memory": 36824, "step": 192482}
{"lr": 0.00022202075925148527, "data_time": 0.15467891693115235, "grad_norm": 0.29848394691944125, "loss": 0.47766093015670774, "time": 1.8019522190093995, "epoch": 616, "memory": 36824, "step": 192595}
{"lr": 0.00022129078057447804, "data_time": 0.0031015157699584963, "grad_norm": 0.27739334404468535, "loss": 0.4740611106157303, "time": 1.815595841407776, "epoch": 616, "memory": 36824, "step": 192695}
{"lr": 0.00022056181125313317, "data_time": 0.003559446334838867, "grad_norm": 0.2923599034547806, "loss": 0.47279457151889803, "time": 1.7885435581207276, "epoch": 616, "memory": 36824, "step": 192795}
{"lr": 0.000219739292225423, "data_time": 0.09197814464569092, "grad_norm": 0.31277879178524015, "loss": 0.4752418488264084, "time": 1.842319393157959, "epoch": 617, "memory": 36824, "step": 192908}
{"lr": 0.00021901247706768045, "data_time": 0.002818870544433594, "grad_norm": 0.33447813540697097, "loss": 0.47567870914936067, "time": 1.819825792312622, "epoch": 617, "memory": 36824, "step": 193008}
{"lr": 0.00021828667523930353, "data_time": 0.0037782907485961912, "grad_norm": 0.3006740093231201, "loss": 0.472241672873497, "time": 1.8094460010528564, "epoch": 617, "memory": 36824, "step": 193108}
{"lr": 0.0002174677402538627, "data_time": 0.03563737869262695, "grad_norm": 0.28898624181747434, "loss": 0.4751291394233704, "time": 1.8107136011123657, "epoch": 618, "memory": 36824, "step": 193221}
{"lr": 0.00021674410103465032, "data_time": 0.0030526638031005858, "grad_norm": 0.29365592896938325, "loss": 0.47876264750957487, "time": 1.8444268941879272, "epoch": 618, "memory": 36824, "step": 193321}
{"lr": 0.00021602147910119196, "data_time": 0.003746485710144043, "grad_norm": 0.3250339597463608, "loss": 0.47023719549179077, "time": 1.7696313858032227, "epoch": 618, "memory": 36824, "step": 193421}
{"lr": 0.00021520614215136009, "data_time": 0.05596790313720703, "grad_norm": 0.3063710659742355, "loss": 0.47186183333396914, "time": 1.8556304216384887, "epoch": 619, "memory": 36824, "step": 193534}
{"lr": 0.0002144856912356761, "data_time": 0.0025673151016235352, "grad_norm": 0.3070303827524185, "loss": 0.47626285552978515, "time": 1.8618226289749145, "epoch": 619, "memory": 36824, "step": 193634}
{"lr": 0.0002137662615447514, "data_time": 0.002966141700744629, "grad_norm": 0.31822085082530976, "loss": 0.4780005246400833, "time": 1.787357997894287, "epoch": 619, "memory": 36824, "step": 193734}
{"lr": 0.00021295453656238725, "data_time": 0.324293851852417, "grad_norm": 0.3148320823907852, "loss": 0.4762276470661163, "time": 1.7969334363937377, "epoch": 620, "memory": 36824, "step": 193847}
{"lr": 0.00021223728626075004, "data_time": 0.0026970624923706053, "grad_norm": 0.2902941033244133, "loss": 0.47836938202381135, "time": 1.8811174392700196, "epoch": 620, "memory": 36824, "step": 193947}
{"lr": 0.000211521061105427, "data_time": 0.0036039829254150392, "grad_norm": 0.27993905991315843, "loss": 0.4719769835472107, "time": 1.8580979585647583, "epoch": 620, "memory": 36824, "step": 194047}
{"lr": 0.0002107129619606708, "data_time": 0.07696976661682128, "grad_norm": 0.31308797299861907, "loss": 0.47594418525695803, "time": 1.860374879837036, "epoch": 621, "memory": 36824, "step": 194160}
{"lr": 0.000209998924528909, "data_time": 0.002886533737182617, "grad_norm": 0.30598402917385104, "loss": 0.47776550948619845, "time": 1.8481396913528443, "epoch": 621, "memory": 36824, "step": 194260}
{"lr": 0.00020928591614749966, "data_time": 0.0031485795974731446, "grad_norm": 0.3241616070270538, "loss": 0.4766607373952866, "time": 1.8621478080749512, "epoch": 621, "memory": 36824, "step": 194360}
{"lr": 0.0002084814566485363, "data_time": 0.3474376440048218, "grad_norm": 0.28715801537036895, "loss": 0.4757765710353851, "time": 1.887923240661621, "epoch": 622, "memory": 36824, "step": 194473}
{"lr": 0.00020777064428757918, "data_time": 0.0030806779861450194, "grad_norm": 0.2964883536100388, "loss": 0.4738389730453491, "time": 1.7743882417678833, "epoch": 622, "memory": 36824, "step": 194573}
{"lr": 0.00020706086486343002, "data_time": 0.0032088756561279297, "grad_norm": Infinity, "loss": 0.4717843770980835, "time": 1.831091284751892, "epoch": 622, "memory": 36824, "step": 194673}
{"lr": 0.00020626005875625303, "data_time": 0.07367703914642335, "grad_norm": 0.2914549559354782, "loss": 0.4736895650625229, "time": 1.7720197439193726, "epoch": 623, "memory": 36824, "step": 194786}
{"lr": 0.00020555248361192274, "data_time": 0.0027803659439086916, "grad_norm": 0.30446104109287264, "loss": 0.4681916475296021, "time": 1.8610310077667236, "epoch": 623, "memory": 36824, "step": 194886}
{"lr": 0.00020484594527320503, "data_time": 0.003911256790161133, "grad_norm": 0.3181255877017975, "loss": 0.4743686139583588, "time": 1.8637824296951293, "epoch": 623, "memory": 36824, "step": 194986}
{"lr": 0.000204048806241381, "data_time": 0.060497283935546875, "grad_norm": 0.345497864484787, "loss": 0.47344210743904114, "time": 1.8665513277053833, "epoch": 624, "memory": 36824, "step": 195099}
{"lr": 0.0002033444804041845, "data_time": 0.0027051448822021486, "grad_norm": 0.2981741547584534, "loss": 0.4772574007511139, "time": 1.7535142183303833, "epoch": 624, "memory": 36824, "step": 195199}
{"lr": 0.0002026411952236896, "data_time": 0.002968430519104004, "grad_norm": 0.34246608316898347, "loss": 0.4752033591270447, "time": 1.853580689430237, "epoch": 624, "memory": 36824, "step": 195299}
{"lr": 0.00020184773688812512, "data_time": 0.21620936393737794, "grad_norm": 0.297745069861412, "loss": 0.47422710955142977, "time": 1.8851194381713867, "epoch": 625, "memory": 36824, "step": 195412}
{"lr": 0.0002011466723930479, "data_time": 0.0029117822647094726, "grad_norm": 0.305534827709198, "loss": 0.4790799617767334, "time": 1.8608591556549072, "epoch": 625, "memory": 36824, "step": 195512}
{"lr": 0.00020044665238797837, "data_time": 0.003306126594543457, "grad_norm": 0.3227638602256775, "loss": 0.4728388965129852, "time": 1.805067253112793, "epoch": 625, "memory": 36824, "step": 195612}
{"lr": 0.0001996568883066874, "data_time": 0.002765798568725586, "grad_norm": 0.2921551138162613, "loss": 0.47520368099212645, "time": 1.9041582107543946, "epoch": 626, "memory": 36824, "step": 195725}
{"lr": 0.00019895909713298695, "data_time": 0.0031733274459838866, "grad_norm": 0.31960135996341704, "loss": 0.47379235327243807, "time": 1.8322558641433715, "epoch": 626, "memory": 36824, "step": 195825}
{"lr": 0.00019826235426475395, "data_time": 0.0034548044204711914, "grad_norm": 0.2968972161412239, "loss": 0.4745349705219269, "time": 1.8061171531677247, "epoch": 626, "memory": 36824, "step": 195925}
{"lr": 0.0001974762979326254, "data_time": 0.0026510953903198242, "grad_norm": 0.2892356812953949, "loss": 0.47463343739509584, "time": 1.8322593450546265, "epoch": 627, "memory": 36824, "step": 196038}
{"lr": 0.00019678179200362808, "data_time": 0.0028037786483764648, "grad_norm": 0.3010362073779106, "loss": 0.4738698124885559, "time": 1.8505542993545532, "epoch": 627, "memory": 36824, "step": 196138}
{"lr": 0.00019608833817764485, "data_time": 0.00369722843170166, "grad_norm": 0.27772543877363204, "loss": 0.47549839317798615, "time": 1.8429807901382447, "epoch": 627, "memory": 36824, "step": 196238}
{"lr": 0.0001953060030262126, "data_time": 0.4652229070663452, "grad_norm": 0.2851785600185394, "loss": 0.472473731637001, "time": 1.8654666900634767, "epoch": 628, "memory": 36824, "step": 196351}
{"lr": 0.0001946147942091082, "data_time": 0.20349206924438476, "grad_norm": 0.32092070281505586, "loss": 0.4778782337903976, "time": 1.8801271200180054, "epoch": 628, "memory": 36824, "step": 196451}
{"lr": 0.00019392464127458714, "data_time": 0.0033025264739990233, "grad_norm": 0.3129555255174637, "loss": 0.47202480733394625, "time": 1.8288079977035523, "epoch": 628, "memory": 36824, "step": 196551}
{"lr": 0.00019314604067180072, "data_time": 0.1873006820678711, "grad_norm": 0.3252165406942368, "loss": 0.47449941039085386, "time": 1.8545727491378785, "epoch": 629, "memory": 36824, "step": 196664}
{"lr": 0.00019245814077744103, "data_time": 0.0026967763900756837, "grad_norm": 0.3140968233346939, "loss": 0.48090546727180483, "time": 1.8110113859176635, "epoch": 629, "memory": 36824, "step": 196764}
{"lr": 0.0001917713005271911, "data_time": 0.0029607295989990236, "grad_norm": 0.3001008540391922, "loss": 0.47429845929145814, "time": 1.7472171306610107, "epoch": 629, "memory": 36824, "step": 196864}
{"lr": 0.00019099644777718637, "data_time": 0.19825000762939454, "grad_norm": 0.28396516144275663, "loss": 0.4775762766599655, "time": 1.913338041305542, "epoch": 630, "memory": 36824, "step": 196977}
{"lr": 0.00019031186855988236, "data_time": 0.002558612823486328, "grad_norm": 0.30649955868721007, "loss": 0.47435466647148133, "time": 1.8301871061325072, "epoch": 630, "memory": 36824, "step": 197077}
{"lr": 0.000189628352730109, "data_time": 0.0030789613723754884, "grad_norm": 0.27410600185394285, "loss": 0.4751808077096939, "time": 1.7935542345046998, "epoch": 630, "memory": 36824, "step": 197177}
{"lr": 0.00018885726107298133, "data_time": 0.40078637599945066, "grad_norm": 0.29111969470977783, "loss": 0.47505388855934144, "time": 1.8224143505096435, "epoch": 631, "memory": 36824, "step": 197290}
{"lr": 0.0001881760142303033, "data_time": 0.00262303352355957, "grad_norm": 0.31534308195114136, "loss": 0.4747704595327377, "time": 1.8172211170196533, "epoch": 631, "memory": 36824, "step": 197390}
{"lr": 0.00018749583450040604, "data_time": 0.0032683610916137695, "grad_norm": 0.29643296003341674, "loss": 0.4751777082681656, "time": 1.8090659141540528, "epoch": 631, "memory": 36824, "step": 197490}
{"lr": 0.00018672851711198482, "data_time": 0.6177837371826171, "grad_norm": 0.32356018424034116, "loss": 0.4740719169378281, "time": 2.1613845586776734, "epoch": 632, "memory": 36824, "step": 197603}
{"lr": 0.00018605061428456136, "data_time": 0.0025447607040405273, "grad_norm": 0.2902654528617859, "loss": 0.472701296210289, "time": 1.7678899765014648, "epoch": 632, "memory": 36824, "step": 197703}
{"lr": 0.00018537378227693525, "data_time": 0.00319063663482666, "grad_norm": 0.2950261414051056, "loss": 0.47455944716930387, "time": 1.8429217100143434, "epoch": 632, "memory": 36824, "step": 197803}
{"lr": 0.000184610252268557, "data_time": 0.003182125091552734, "grad_norm": 0.32121773064136505, "loss": 0.47038642764091493, "time": 1.7873602390289307, "epoch": 633, "memory": 36824, "step": 197916}
{"lr": 0.00018393570503987718, "data_time": 0.002824831008911133, "grad_norm": 0.2896991789340973, "loss": 0.47345936596393584, "time": 1.8340781927108765, "epoch": 633, "memory": 36824, "step": 198016}
{"lr": 0.0001832622323197135, "data_time": 0.0038633346557617188, "grad_norm": 0.29541209191083906, "loss": 0.47665382623672486, "time": 1.849971318244934, "epoch": 633, "memory": 36824, "step": 198116}
{"lr": 0.000182502502738, "data_time": 0.18650519847869873, "grad_norm": 0.3049191117286682, "loss": 0.4740696221590042, "time": 1.9348751783370972, "epoch": 634, "memory": 36824, "step": 198229}
{"lr": 0.0001818313226342149, "data_time": 0.0026787757873535157, "grad_norm": 0.3145563393831253, "loss": 0.47514501214027405, "time": 1.8394196033477783, "epoch": 634, "memory": 36824, "step": 198329}
{"lr": 0.0001811612207093043, "data_time": 0.0029465913772583007, "grad_norm": 0.3019929677248001, "loss": 0.4746652692556381, "time": 1.846458077430725, "epoch": 634, "memory": 36824, "step": 198429}
{"lr": 0.00018040530453593764, "data_time": 0.28578801155090333, "grad_norm": 0.3120181977748871, "loss": 0.47450836896896365, "time": 1.7980427026748658, "epoch": 635, "memory": 36824, "step": 198542}
{"lr": 0.00017973750302566354, "data_time": 0.0025942325592041016, "grad_norm": 0.31616735756397246, "loss": 0.4696860760450363, "time": 1.8223750114440918, "epoch": 635, "memory": 36824, "step": 198642}
{"lr": 0.000179070783346199, "data_time": 0.003029537200927734, "grad_norm": 0.30482002794742585, "loss": 0.4701546549797058, "time": 1.8098297119140625, "epoch": 635, "memory": 36824, "step": 198742}
{"lr": 0.0001783186934977008, "data_time": 0.1594240665435791, "grad_norm": 0.2994598835706711, "loss": 0.4735113024711609, "time": 2.026412105560303, "epoch": 636, "memory": 36824, "step": 198855}
{"lr": 0.00017765428199182254, "data_time": 0.0028560400009155274, "grad_norm": 0.3021433025598526, "loss": 0.47223593592643737, "time": 1.7459732055664063, "epoch": 636, "memory": 36824, "step": 198955}
{"lr": 0.00017699095595020433, "data_time": 0.003649783134460449, "grad_norm": 0.3056428521871567, "loss": 0.4712088406085968, "time": 2.0993613958358766, "epoch": 636, "memory": 36824, "step": 199055}
{"lr": 0.00017624270527771517, "data_time": 0.19648342132568358, "grad_norm": 0.3137287825345993, "loss": 0.4756107985973358, "time": 1.77586772441864, "epoch": 637, "memory": 36824, "step": 199168}
{"lr": 0.0001755816951291922, "data_time": 0.0024905681610107424, "grad_norm": 0.3329882174730301, "loss": 0.4772282987833023, "time": 1.818321204185486, "epoch": 637, "memory": 36824, "step": 199268}
{"lr": 0.00017492177405983175, "data_time": 0.002975654602050781, "grad_norm": 0.2874630942940712, "loss": 0.47234258353710173, "time": 1.760209822654724, "epoch": 637, "memory": 36824, "step": 199368}
{"lr": 0.00017417737534889107, "data_time": 0.171569561958313, "grad_norm": 0.30464988946914673, "loss": 0.4782411128282547, "time": 1.8982207298278808, "epoch": 638, "memory": 36824, "step": 199481}
{"lr": 0.00017351977785256336, "data_time": 0.0028539657592773437, "grad_norm": 0.30575437247753146, "loss": 0.47142356634140015, "time": 1.7738937139511108, "epoch": 638, "memory": 36824, "step": 199581}
{"lr": 0.000172863273031691, "data_time": 0.0030758857727050783, "grad_norm": 0.32431466281414034, "loss": 0.4742100328207016, "time": 1.8105138063430786, "epoch": 638, "memory": 36824, "step": 199681}
{"lr": 0.00017212273900201906, "data_time": 0.3444568872451782, "grad_norm": 0.2987838536500931, "loss": 0.47349960505962374, "time": 1.801423692703247, "epoch": 639, "memory": 36824, "step": 199794}
{"lr": 0.00017146856539441356, "data_time": 0.0028159379959106444, "grad_norm": 0.31853888630867006, "loss": 0.474225053191185, "time": 1.8311555624008178, "epoch": 639, "memory": 36824, "step": 199894}
{"lr": 0.00017081548803988509, "data_time": 0.004020333290100098, "grad_norm": 0.29231452345848086, "loss": 0.47556007504463194, "time": 1.8128407001495361, "epoch": 639, "memory": 36824, "step": 199994}
{"lr": 0.00017007883134516517, "data_time": 0.18096764087677003, "grad_norm": 0.29728239923715594, "loss": 0.4769182145595551, "time": 1.8816840887069701, "epoch": 640, "memory": 36824, "step": 200107}
{"lr": 0.00016942809280430377, "data_time": 0.0031661748886108398, "grad_norm": 0.32133913040161133, "loss": 0.47080217599868773, "time": 1.8203115224838258, "epoch": 640, "memory": 36824, "step": 200207}
{"lr": 0.0001687784540754097, "data_time": 0.0028627872467041015, "grad_norm": 0.3160906195640564, "loss": 0.4755733847618103, "time": 2.0585058689117433, "epoch": 640, "memory": 36824, "step": 200307}
{"lr": 0.00016804568730307217, "data_time": 0.19533014297485352, "grad_norm": 0.2982688039541245, "loss": 0.4753247439861298, "time": 1.7614830493927003, "epoch": 641, "memory": 36824, "step": 200420}
{"lr": 0.00016739839494828095, "data_time": 0.0027581214904785155, "grad_norm": 0.29265737533569336, "loss": 0.4727259069681168, "time": 1.8150407791137695, "epoch": 641, "memory": 36824, "step": 200520}
{"lr": 0.0001667522059455546, "data_time": 0.003744626045227051, "grad_norm": 0.3221758842468262, "loss": 0.47286067605018617, "time": 1.78363037109375, "epoch": 641, "memory": 36824, "step": 200620}
{"lr": 0.00016602334161656179, "data_time": 0.5705456733703613, "grad_norm": 0.31396077424287794, "loss": 0.475114643573761, "time": 2.0397088289260865, "epoch": 642, "memory": 36824, "step": 200733}
{"lr": 0.00016537950650828093, "data_time": 0.0027529001235961914, "grad_norm": 0.33195365965366364, "loss": 0.47220989465713503, "time": 1.7880728244781494, "epoch": 642, "memory": 36824, "step": 200833}
{"lr": 0.00016473677827330945, "data_time": 0.0032170772552490234, "grad_norm": 0.29236013889312745, "loss": 0.47801203429698946, "time": 1.7830426454544068, "epoch": 642, "memory": 36824, "step": 200933}
{"lr": 0.00016401182884194168, "data_time": 0.12098078727722168, "grad_norm": 0.29187384247779846, "loss": 0.47006079852581023, "time": 1.792660641670227, "epoch": 643, "memory": 36824, "step": 201046}
{"lr": 0.00016337146198153754, "data_time": 0.0026273012161254885, "grad_norm": 0.2884869158267975, "loss": 0.47031748592853545, "time": 1.8039719581604003, "epoch": 643, "memory": 36824, "step": 201146}
{"lr": 0.00016273220549677293, "data_time": 0.0030761003494262696, "grad_norm": 0.32005818784236906, "loss": 0.4758721262216568, "time": 1.8754586696624755, "epoch": 643, "memory": 36824, "step": 201246}
{"lr": 0.00016201118335041592, "data_time": 0.32852151393890383, "grad_norm": 0.3250639259815216, "loss": 0.47109549939632417, "time": 1.9102858543395995, "epoch": 644, "memory": 36824, "step": 201359}
{"lr": 0.00016137429567999084, "data_time": 0.002808690071105957, "grad_norm": 0.32931326031684877, "loss": 0.4759499907493591, "time": 1.8037264823913575, "epoch": 644, "memory": 36824, "step": 201459}
{"lr": 0.00016073852186856285, "data_time": 0.0032249212265014647, "grad_norm": 0.322348353266716, "loss": 0.4769628942012787, "time": 1.88587486743927, "epoch": 644, "memory": 36824, "step": 201559}
{"lr": 0.00016002143932749557, "data_time": 0.0028211116790771485, "grad_norm": 0.3028994739055634, "loss": 0.47258876264095306, "time": 1.821504545211792, "epoch": 645, "memory": 36824, "step": 201672}
{"lr": 0.00015938804172970275, "data_time": 0.002657914161682129, "grad_norm": 0.2996906816959381, "loss": 0.4768958806991577, "time": 1.8569782972335815, "epoch": 645, "memory": 36824, "step": 201772}
{"lr": 0.00015875576145523186, "data_time": 0.003096151351928711, "grad_norm": 0.2962862700223923, "loss": 0.4725840538740158, "time": 1.9397557020187377, "epoch": 645, "memory": 36824, "step": 201872}
{"lr": 0.00015804263077241653, "data_time": 0.16685452461242675, "grad_norm": 0.3074440896511078, "loss": 0.4743272840976715, "time": 2.0236074686050416, "epoch": 646, "memory": 36824, "step": 201985}
{"lr": 0.0001574127340702729, "data_time": 0.0030414581298828123, "grad_norm": 0.3103379011154175, "loss": 0.474485245347023, "time": 1.765410017967224, "epoch": 646, "memory": 36824, "step": 202085}
{"lr": 0.00015678395813668432, "data_time": 0.00320889949798584, "grad_norm": 0.30664020478725434, "loss": 0.47429331839084626, "time": 1.8749161958694458, "epoch": 646, "memory": 36824, "step": 202185}
{"lr": 0.00015607479149755686, "data_time": 0.10010652542114258, "grad_norm": 0.33375281393527984, "loss": 0.47302969098091124, "time": 1.8266450881958007, "epoch": 647, "memory": 36824, "step": 202298}
{"lr": 0.00015544840645425933, "data_time": 0.002608990669250488, "grad_norm": 0.31656442284584047, "loss": 0.4745232701301575, "time": 1.858039665222168, "epoch": 647, "memory": 36824, "step": 202398}
{"lr": 0.00015482314560559874, "data_time": 0.004088354110717773, "grad_norm": 0.2917275369167328, "loss": 0.4737079381942749, "time": 1.8189257860183716, "epoch": 647, "memory": 36824, "step": 202498}
{"lr": 0.0001541179551278602, "data_time": 0.07913832664489746, "grad_norm": 0.3289179801940918, "loss": 0.4761052966117859, "time": 1.8560450077056885, "epoch": 648, "memory": 36824, "step": 202611}
{"lr": 0.00015349509244660147, "data_time": 0.0027841567993164063, "grad_norm": 0.31581319570541383, "loss": 0.47386662364006044, "time": 1.8690467357635498, "epoch": 648, "memory": 36824, "step": 202711}
{"lr": 0.0001528733573668517, "data_time": 0.0031480312347412108, "grad_norm": 0.3117305487394333, "loss": 0.4764531672000885, "time": 1.8487690925598144, "epoch": 648, "memory": 36824, "step": 202811}
{"lr": 0.00015217215510026171, "data_time": 0.14922010898590088, "grad_norm": 0.30582537949085237, "loss": 0.4750628411769867, "time": 1.858227825164795, "epoch": 649, "memory": 36824, "step": 202924}
{"lr": 0.00015155282542404541, "data_time": 0.0033565759658813477, "grad_norm": 0.2865491405129433, "loss": 0.4751128524541855, "time": 1.8747323989868163, "epoch": 649, "memory": 36824, "step": 203024}
{"lr": 0.0001509346267369435, "data_time": 0.003571963310241699, "grad_norm": 0.3163466781377792, "loss": 0.4739485740661621, "time": 1.9013137340545654, "epoch": 649, "memory": 36824, "step": 203124}
{"lr": 0.00015023742466311395, "data_time": 0.49373056888580324, "grad_norm": 0.30498053431510924, "loss": 0.475729364156723, "time": 1.825570011138916, "epoch": 650, "memory": 36824, "step": 203237}
{"lr": 0.00014962163857457612, "data_time": 0.002805471420288086, "grad_norm": 0.3103015720844269, "loss": 0.4706206083297729, "time": 1.875476360321045, "epoch": 650, "memory": 36824, "step": 203337}
{"lr": 0.00014900698684343217, "data_time": 0.004143452644348145, "grad_norm": 0.2836254209280014, "loss": 0.4724904507398605, "time": 1.9317134618759155, "epoch": 650, "memory": 36824, "step": 203437}
{"lr": 0.0001483137968756229, "data_time": 0.0026844501495361327, "grad_norm": 0.3015568405389786, "loss": 0.4735854297876358, "time": 1.8407560110092163, "epoch": 651, "memory": 36824, "step": 203550}
{"lr": 0.0001477015648968486, "data_time": 0.0027041912078857424, "grad_norm": 0.3032393127679825, "loss": 0.47438215017318724, "time": 1.932504153251648, "epoch": 651, "memory": 36824, "step": 203650}
{"lr": 0.0001470904706243648, "data_time": 0.00291748046875, "grad_norm": 0.282848584651947, "loss": 0.4694345235824585, "time": 1.8055185794830322, "epoch": 651, "memory": 36824, "step": 203750}
{"lr": 0.00014640130460727996, "data_time": 0.2311389923095703, "grad_norm": 0.30390847027301787, "loss": 0.47311113476753236, "time": 1.837848687171936, "epoch": 652, "memory": 36824, "step": 203863}
{"lr": 0.00014579263719962466, "data_time": 0.0029772043228149412, "grad_norm": 0.29216999411582945, "loss": 0.47341895699501035, "time": 1.924198579788208, "epoch": 652, "memory": 36824, "step": 203963}
{"lr": 0.00014518511082771578, "data_time": 0.003697037696838379, "grad_norm": 0.3127431243658066, "loss": 0.47389532029628756, "time": 1.832518219947815, "epoch": 652, "memory": 36824, "step": 204063}
{"lr": 0.00014449998053730117, "data_time": 0.018751764297485353, "grad_norm": 0.28977112770080565, "loss": 0.4777149260044098, "time": 1.8343530178070069, "epoch": 653, "memory": 36824, "step": 204176}
{"lr": 0.00014389488810121165, "data_time": 0.0029548883438110353, "grad_norm": 0.3154391050338745, "loss": 0.473826539516449, "time": 1.727133846282959, "epoch": 653, "memory": 36824, "step": 204276}
{"lr": 0.00014329094001082706, "data_time": 0.0037474870681762696, "grad_norm": 0.2982613518834114, "loss": 0.4719293475151062, "time": 1.8358423471450807, "epoch": 653, "memory": 36824, "step": 204376}
{"lr": 0.000142609857154069, "data_time": 0.24335970878601074, "grad_norm": 0.2896295666694641, "loss": 0.47703427970409396, "time": 1.8124092102050782, "epoch": 654, "memory": 36824, "step": 204489}
{"lr": 0.00014200835002890503, "data_time": 0.0027739286422729494, "grad_norm": 0.3123367041349411, "loss": 0.47290776669979095, "time": 1.7765000104904174, "epoch": 654, "memory": 36824, "step": 204589}
{"lr": 0.00014140799053985193, "data_time": 0.003981781005859375, "grad_norm": 0.3543441116809845, "loss": 0.47949694395065307, "time": 1.8290741920471192, "epoch": 654, "memory": 36824, "step": 204689}
{"lr": 0.00014073096675457715, "data_time": 0.12069041728973388, "grad_norm": 0.29713232070207596, "loss": 0.4729625016450882, "time": 1.840447688102722, "epoch": 655, "memory": 36824, "step": 204802}
{"lr": 0.00014013305521843588, "data_time": 0.0024771690368652344, "grad_norm": 0.3023084163665771, "loss": 0.47403441071510316, "time": 1.813390564918518, "epoch": 655, "memory": 36824, "step": 204902}
{"lr": 0.000139536294589201, "data_time": 0.0028934240341186523, "grad_norm": 0.29563224613666533, "loss": 0.46814086139202116, "time": 1.7447372198104858, "epoch": 655, "memory": 36824, "step": 205002}
{"lr": 0.0001388633414438777, "data_time": 0.16259143352508545, "grad_norm": 0.3074131593108177, "loss": 0.4718369871377945, "time": 1.8160830974578857, "epoch": 656, "memory": 36824, "step": 205115}
{"lr": 0.0001382690357134179, "data_time": 0.0026029348373413086, "grad_norm": 0.30444861948490143, "loss": 0.4715700685977936, "time": 1.7708466053009033, "epoch": 656, "memory": 36824, "step": 205215}
{"lr": 0.00013767588414099395, "data_time": 0.0028128862380981446, "grad_norm": 0.2877089500427246, "loss": 0.47267990112304686, "time": 1.916139578819275, "epoch": 656, "memory": 36824, "step": 205315}
{"lr": 0.000137007013134535, "data_time": 0.4359637022018433, "grad_norm": 0.30091617703437806, "loss": 0.4712859094142914, "time": 1.7924073457717895, "epoch": 657, "memory": 36824, "step": 205428}
{"lr": 0.00013641632336480178, "data_time": 0.0026528120040893556, "grad_norm": 0.3158330976963043, "loss": 0.47713049948215486, "time": 1.781122875213623, "epoch": 657, "memory": 36824, "step": 205528}
{"lr": 0.00013582679098451242, "data_time": 0.0029118537902832033, "grad_norm": 0.31952554285526275, "loss": 0.47924443781375886, "time": 1.7382744550704956, "epoch": 657, "memory": 36824, "step": 205628}
{"lr": 0.00013516201354607795, "data_time": 0.002607893943786621, "grad_norm": 0.3156108736991882, "loss": 0.4747291475534439, "time": 1.7981610298156738, "epoch": 658, "memory": 36824, "step": 205741}
{"lr": 0.00013457494983033, "data_time": 0.0025823354721069337, "grad_norm": 0.3063901364803314, "loss": 0.47558199167251586, "time": 2.052316975593567, "epoch": 658, "memory": 36824, "step": 205841}
{"lr": 0.00013398904671565703, "data_time": 0.003595280647277832, "grad_norm": 0.3062672048807144, "loss": 0.47064791023731234, "time": 1.7553563356399535, "epoch": 658, "memory": 36824, "step": 205941}
{"lr": 0.0001333283742044591, "data_time": 0.0027738571166992187, "grad_norm": 0.32206123173236845, "loss": 0.4745903968811035, "time": 1.8171891212463378, "epoch": 659, "memory": 36824, "step": 206054}
{"lr": 0.00013274494657399504, "data_time": 0.0029247522354125975, "grad_norm": 0.31297558844089507, "loss": 0.47129151225090027, "time": 1.819279718399048, "epoch": 659, "memory": 36824, "step": 206154}
{"lr": 0.00013216268273640665, "data_time": 0.0032727956771850587, "grad_norm": 0.30413919389247895, "loss": 0.47404618561267853, "time": 1.8915310859680177, "epoch": 659, "memory": 36824, "step": 206254}
{"lr": 0.00013150612644151566, "data_time": 0.003151273727416992, "grad_norm": 0.302306105196476, "loss": 0.47523796558380127, "time": 1.820749044418335, "epoch": 660, "memory": 36824, "step": 206367}
{"lr": 0.00013092634486550422, "data_time": 0.002687811851501465, "grad_norm": 0.2977712780237198, "loss": 0.4695093870162964, "time": 1.8258233785629272, "epoch": 660, "memory": 36824, "step": 206467}
{"lr": 0.00013034773025428248, "data_time": 0.0031546592712402345, "grad_norm": 0.3404166907072067, "loss": 0.47074671983718874, "time": 1.7083713054656982, "epoch": 660, "memory": 36824, "step": 206567}
{"lr": 0.0001296953013944338, "data_time": 0.002807474136352539, "grad_norm": 0.2933575794100761, "loss": 0.47346227169036864, "time": 1.8694430112838745, "epoch": 661, "memory": 36824, "step": 206680}
{"lr": 0.00012911917577974254, "data_time": 0.002736496925354004, "grad_norm": 0.3060747101902962, "loss": 0.4738771319389343, "time": 1.8804242134094238, "epoch": 661, "memory": 36824, "step": 206780}
{"lr": 0.00012854422028181478, "data_time": 0.0030898571014404295, "grad_norm": 0.30295397341251373, "loss": 0.4758871555328369, "time": 1.8126241207122802, "epoch": 661, "memory": 36824, "step": 206880}
{"lr": 0.00012789593000521807, "data_time": 0.07273039817810059, "grad_norm": 0.2979686200618744, "loss": 0.47506031692028045, "time": 1.7838010311126709, "epoch": 662, "memory": 36824, "step": 206993}
{"lr": 0.00012732347019624401, "data_time": 0.002627849578857422, "grad_norm": 0.3096700370311737, "loss": 0.4738494336605072, "time": 1.8964423656463623, "epoch": 662, "memory": 36824, "step": 207093}
{"lr": 0.0001267521836360131, "data_time": 0.0033156871795654297, "grad_norm": 0.30716646611690523, "loss": 0.475055131316185, "time": 1.8776905298233033, "epoch": 662, "memory": 36824, "step": 207193}
{"lr": 0.00012610804302016, "data_time": 0.0025318145751953127, "grad_norm": 0.3023876041173935, "loss": 0.4701804518699646, "time": 1.808153009414673, "epoch": 663, "memory": 36824, "step": 207306}
{"lr": 0.0001255392587986617, "data_time": 0.002556133270263672, "grad_norm": 0.30733791291713713, "loss": 0.4703994274139404, "time": 1.8111982345581055, "epoch": 663, "memory": 36824, "step": 207406}
{"lr": 0.0001249716509378388, "data_time": 0.0035651683807373046, "grad_norm": 0.30038760006427767, "loss": 0.4725243657827377, "time": 1.847658371925354, "epoch": 663, "memory": 36824, "step": 207506}
{"lr": 0.00012433167098931534, "data_time": 0.15882012844085694, "grad_norm": 0.31030943989753723, "loss": 0.47196817994117735, "time": 1.7769967317581177, "epoch": 664, "memory": 36824, "step": 207619}
{"lr": 0.00012376657207424584, "data_time": 0.0027471065521240236, "grad_norm": 0.3099379241466522, "loss": 0.4749846816062927, "time": 1.7436572551727294, "epoch": 664, "memory": 36824, "step": 207719}
{"lr": 0.00012320265261168365, "data_time": 0.0037487268447875975, "grad_norm": 0.2887796819210052, "loss": 0.4772818386554718, "time": 1.8420674800872803, "epoch": 664, "memory": 36824, "step": 207819}
{"lr": 0.0001225668442659813, "data_time": 0.26747152805328367, "grad_norm": 0.29397324919700624, "loss": 0.47280151546001437, "time": 1.8583252668380736, "epoch": 665, "memory": 36824, "step": 207932}
{"lr": 0.00012200544031332197, "data_time": 0.0028016090393066405, "grad_norm": 0.30356102883815766, "loss": 0.47300688922405243, "time": 1.8466769456863403, "epoch": 665, "memory": 36824, "step": 208032}
{"lr": 0.00012144521888484765, "data_time": 0.0036272525787353514, "grad_norm": 0.3265569120645523, "loss": 0.4718507081270218, "time": 1.803205680847168, "epoch": 665, "memory": 36824, "step": 208132}
{"lr": 0.00012081359300617678, "data_time": 0.023693418502807616, "grad_norm": 0.31789224296808244, "loss": 0.477204304933548, "time": 1.8434193849563598, "epoch": 666, "memory": 36824, "step": 208245}
{"lr": 0.00012025589360877231, "data_time": 0.002486419677734375, "grad_norm": 0.2910173416137695, "loss": 0.472900328040123, "time": 1.7809263944625855, "epoch": 666, "memory": 36824, "step": 208345}
{"lr": 0.00011969937978702459, "data_time": 0.003606104850769043, "grad_norm": 0.3045668125152588, "loss": 0.4697530597448349, "time": 1.7428390026092528, "epoch": 666, "memory": 36824, "step": 208445}
{"lr": 0.00011907194716812888, "data_time": 0.15387158393859862, "grad_norm": 0.31422743648290635, "loss": 0.4752723515033722, "time": 1.7795210361480713, "epoch": 667, "memory": 36824, "step": 208558}
{"lr": 0.00011851796185552329, "data_time": 0.002660846710205078, "grad_norm": 0.34931998550891874, "loss": 0.4734093129634857, "time": 1.796107029914856, "epoch": 667, "memory": 36824, "step": 208658}
{"lr": 0.00011796516514978782, "data_time": 0.0030443429946899413, "grad_norm": 0.31676324903965, "loss": 0.4740662187337875, "time": 1.7762393474578857, "epoch": 667, "memory": 36824, "step": 208758}
{"lr": 0.0001173419365117597, "data_time": 0.20785458087921144, "grad_norm": 0.3013989686965942, "loss": 0.472921085357666, "time": 1.7286445140838622, "epoch": 668, "memory": 36824, "step": 208871}
{"lr": 0.00011679167475003337, "data_time": 0.002786684036254883, "grad_norm": 0.299802628159523, "loss": 0.47182817161083224, "time": 1.7548691272735595, "epoch": 668, "memory": 36824, "step": 208971}
{"lr": 0.00011624260460608089, "data_time": 0.0031219482421875, "grad_norm": 0.31014516949653625, "loss": 0.47598993182182314, "time": 1.878311562538147, "epoch": 668, "memory": 36824, "step": 209071}
{"lr": 0.0001156235905981779, "data_time": 0.002754354476928711, "grad_norm": 0.3126907765865326, "loss": 0.46731488704681395, "time": 1.8139209747314453, "epoch": 669, "memory": 36824, "step": 209184}
{"lr": 0.00011507706178978589, "data_time": 0.0025914907455444336, "grad_norm": 0.309361669421196, "loss": 0.4738673150539398, "time": 1.877459168434143, "epoch": 669, "memory": 36824, "step": 209284}
{"lr": 0.00011453172758971014, "data_time": 0.0029889345169067383, "grad_norm": 0.3107818871736526, "loss": 0.4738821655511856, "time": 1.7876594543457032, "epoch": 669, "memory": 36824, "step": 209384}
{"lr": 0.00011391693878917393, "data_time": 0.16030645370483398, "grad_norm": 0.2725107178092003, "loss": 0.4727612018585205, "time": 1.8031945466995238, "epoch": 670, "memory": 36824, "step": 209497}
{"lr": 0.00011337415227278584, "data_time": 0.0030834197998046873, "grad_norm": 0.3169411331415176, "loss": 0.4751159280538559, "time": 1.8096658706665039, "epoch": 670, "memory": 36824, "step": 209597}
{"lr": 0.00011283256333484354, "data_time": 0.0035106897354125976, "grad_norm": 0.31247812509536743, "loss": 0.4713335782289505, "time": 1.8140312910079956, "epoch": 670, "memory": 36824, "step": 209697}
{"lr": 0.00011222201024671863, "data_time": 0.3024137020111084, "grad_norm": 0.2934888705611229, "loss": 0.47271865606307983, "time": 1.7921809434890748, "epoch": 671, "memory": 36824, "step": 209810}
{"lr": 0.00011168297529705856, "data_time": 0.0031240224838256837, "grad_norm": 0.298826065659523, "loss": 0.47352599501609804, "time": 1.7946572065353394, "epoch": 671, "memory": 36824, "step": 209910}
{"lr": 0.00011114514087551027, "data_time": 0.0033092737197875977, "grad_norm": 0.30522445142269133, "loss": 0.4703816741704941, "time": 1.8190698385238648, "epoch": 671, "memory": 36824, "step": 210010}
{"lr": 0.00011053883393246405, "data_time": 0.10208041667938232, "grad_norm": 0.3136069685220718, "loss": 0.47468971610069277, "time": 1.819126796722412, "epoch": 672, "memory": 36824, "step": 210123}
{"lr": 0.00011000355976015198, "data_time": 0.0026321172714233398, "grad_norm": 0.3262016952037811, "loss": 0.47478780150413513, "time": 1.806643533706665, "epoch": 672, "memory": 36824, "step": 210223}
{"lr": 0.00010946948904510406, "data_time": 0.003000354766845703, "grad_norm": 0.3266823798418045, "loss": 0.4750372678041458, "time": 1.8549508810043336, "epoch": 672, "memory": 36824, "step": 210323}
{"lr": 0.00010886743860724941, "data_time": 0.12416059970855713, "grad_norm": 0.3321630507707596, "loss": 0.476904296875, "time": 1.7762478828430175, "epoch": 673, "memory": 36824, "step": 210436}
{"lr": 0.00010833593435864411, "data_time": 0.06710395812988282, "grad_norm": 0.3095499753952026, "loss": 0.47634672522544863, "time": 1.9063703775405885, "epoch": 673, "memory": 36824, "step": 210536}
{"lr": 0.00010780563647589184, "data_time": 0.0036419391632080077, "grad_norm": 0.3275571882724762, "loss": 0.4758023709058762, "time": 1.811669945716858, "epoch": 673, "memory": 36824, "step": 210636}
{"lr": 0.00010720785283060986, "data_time": 0.0025558948516845705, "grad_norm": 0.3108784407377243, "loss": 0.4735637426376343, "time": 1.7870940685272216, "epoch": 674, "memory": 36824, "step": 210749}
{"lr": 0.00010668012758765203, "data_time": 0.002657628059387207, "grad_norm": 0.32346122711896896, "loss": 0.4797893077135086, "time": 1.8197893857955934, "epoch": 674, "memory": 36824, "step": 210849}
{"lr": 0.00010615361159852299, "data_time": 0.0029889345169067383, "grad_norm": 0.3096051335334778, "loss": 0.4738381773233414, "time": 1.7761969804763793, "epoch": 674, "memory": 36824, "step": 210949}
{"lr": 0.00010556010496028645, "data_time": 0.2991339921951294, "grad_norm": 0.28561015278100965, "loss": 0.4715727299451828, "time": 1.8452610731124879, "epoch": 675, "memory": 36824, "step": 211062}
{"lr": 0.00010503616774034484, "data_time": 0.0027955293655395506, "grad_norm": 0.31138553023338317, "loss": 0.4745787590742111, "time": 1.8468926191329955, "epoch": 675, "memory": 36824, "step": 211162}
{"lr": 0.00010451344264154434, "data_time": 0.003610682487487793, "grad_norm": 0.3253671020269394, "loss": 0.4701852321624756, "time": 1.7897868871688842, "epoch": 675, "memory": 36824, "step": 211262}
{"lr": 0.00010392422315174433, "data_time": 0.2934067487716675, "grad_norm": 0.3085592985153198, "loss": 0.47179358303546903, "time": 1.842760157585144, "epoch": 676, "memory": 36824, "step": 211375}
{"lr": 0.00010340408290746053, "data_time": 0.002708697319030762, "grad_norm": 0.33059864640235903, "loss": 0.4721727192401886, "time": 1.8700274229049683, "epoch": 676, "memory": 36824, "step": 211475}
{"lr": 0.00010288515763091823, "data_time": 0.0030349016189575194, "grad_norm": 0.3179481357336044, "loss": 0.4743629962205887, "time": 1.8029887914657592, "epoch": 676, "memory": 36824, "step": 211575}
{"lr": 0.00010230023535769006, "data_time": 0.002542924880981445, "grad_norm": 0.3063301905989647, "loss": 0.4719454854726791, "time": 1.8245620727539062, "epoch": 677, "memory": 36824, "step": 211688}
{"lr": 0.00010178390097682578, "data_time": 0.0025374650955200194, "grad_norm": 0.3217034816741943, "loss": 0.47423783242702483, "time": 1.7973289966583252, "epoch": 677, "memory": 36824, "step": 211788}
{"lr": 0.00010126878438954332, "data_time": 0.003134918212890625, "grad_norm": 0.2936802566051483, "loss": 0.4753934949636459, "time": 2.011400556564331, "epoch": 677, "memory": 36824, "step": 211888}
{"lr": 0.00010068816932759498, "data_time": 0.38620638847351074, "grad_norm": 0.3043934106826782, "loss": 0.4747942924499512, "time": 1.8585419416427613, "epoch": 678, "memory": 36824, "step": 212001}
{"lr": 0.00010017564963287958, "data_time": 0.002620410919189453, "grad_norm": 0.3116029918193817, "loss": 0.47824531197547915, "time": 1.8389647245407104, "epoch": 678, "memory": 36824, "step": 212101}
{"lr": 9.96643505367779e-05, "data_time": 0.004027652740478516, "grad_norm": 0.30229227244853973, "loss": 0.47016549706459043, "time": 1.846093440055847, "epoch": 678, "memory": 36824, "step": 212201}
{"lr": 9.908805260721871e-05, "data_time": 0.18329708576202391, "grad_norm": 0.2974013686180115, "loss": 0.4746151268482208, "time": 1.9257128715515137, "epoch": 679, "memory": 36824, "step": 212314}
{"lr": 9.857935635620003e-05, "data_time": 0.002688002586364746, "grad_norm": 0.31260557770729064, "loss": 0.4712506353855133, "time": 1.8372167110443116, "epoch": 679, "memory": 36824, "step": 212414}
{"lr": 9.807188348796995e-05, "data_time": 0.0038730144500732423, "grad_norm": 0.28852247148752214, "loss": 0.4741341799497604, "time": 1.994618797302246, "epoch": 679, "memory": 36824, "step": 212514}
{"lr": 9.749991253814144e-05, "data_time": 0.10316758155822754, "grad_norm": 0.31989677548408507, "loss": 0.4756148487329483, "time": 1.827200675010681, "epoch": 680, "memory": 36824, "step": 212627}
{"lr": 9.699504842303518e-05, "data_time": 0.0027963399887084963, "grad_norm": 0.3121533691883087, "loss": 0.4772143870592117, "time": 1.7615434885025025, "epoch": 680, "memory": 36824, "step": 212727}
{"lr": 9.649141045398762e-05, "data_time": 0.003168797492980957, "grad_norm": 0.3006356358528137, "loss": 0.47211564481258395, "time": 1.8408377170562744, "epoch": 680, "memory": 36824, "step": 212827}
{"lr": 9.592377625729538e-05, "data_time": 0.054692554473876956, "grad_norm": 0.2802845433354378, "loss": 0.47317915558815005, "time": 1.7886029958724976, "epoch": 681, "memory": 36824, "step": 212940}
{"lr": 9.542275290483597e-05, "data_time": 0.0026578426361083983, "grad_norm": 0.3097956866025925, "loss": 0.47260584533214567, "time": 1.851561141014099, "epoch": 681, "memory": 36824, "step": 213040}
{"lr": 9.492295844075459e-05, "data_time": 0.0030576229095458985, "grad_norm": 0.29923728108406067, "loss": 0.47596212923526765, "time": 1.895541214942932, "epoch": 681, "memory": 36824, "step": 213140}
{"lr": 9.435967069650053e-05, "data_time": 0.1692530632019043, "grad_norm": 0.29557699263095855, "loss": 0.47286154627799987, "time": 1.8040055274963378, "epoch": 682, "memory": 36824, "step": 213253}
{"lr": 9.386249666779495e-05, "data_time": 0.002646660804748535, "grad_norm": 0.31172277629375456, "loss": 0.4712201803922653, "time": 1.8025595903396607, "epoch": 682, "memory": 36824, "step": 213353}
{"lr": 9.336655424878813e-05, "data_time": 0.0029307842254638673, "grad_norm": 0.3120175123214722, "loss": 0.47289707958698274, "time": 1.7744555711746215, "epoch": 682, "memory": 36824, "step": 213453}
{"lr": 9.280762258200598e-05, "data_time": 0.0026815652847290037, "grad_norm": 0.315747532248497, "loss": 0.4729601353406906, "time": 1.8349954605102539, "epoch": 683, "memory": 36824, "step": 213566}
{"lr": 9.231430637238616e-05, "data_time": 0.0026932954788208008, "grad_norm": 0.33336140811443327, "loss": 0.4774166405200958, "time": 1.8637020349502564, "epoch": 683, "memory": 36824, "step": 213666}
{"lr": 9.182222447274146e-05, "data_time": 0.0035593271255493163, "grad_norm": 0.28673578053712845, "loss": 0.4785132944583893, "time": 1.9472365140914918, "epoch": 683, "memory": 36824, "step": 213766}
{"lr": 9.126765843403107e-05, "data_time": 0.16436281204223632, "grad_norm": 0.3046416401863098, "loss": 0.47130605280399324, "time": 1.7495047330856324, "epoch": 684, "memory": 36824, "step": 213879}
{"lr": 9.077820847291019e-05, "data_time": 0.003491377830505371, "grad_norm": 0.29331842064857483, "loss": 0.47494221329689024, "time": 1.8369031429290772, "epoch": 684, "memory": 36824, "step": 213979}
{"lr": 9.02899955009495e-05, "data_time": 0.003110337257385254, "grad_norm": 0.28345032036304474, "loss": 0.4693427175283432, "time": 1.946219277381897, "epoch": 684, "memory": 36824, "step": 214079}
{"lr": 8.973980456631434e-05, "data_time": 0.06832973957061768, "grad_norm": 0.3279388576745987, "loss": 0.48001262843608855, "time": 1.8134299516677856, "epoch": 685, "memory": 36824, "step": 214192}
{"lr": 8.925422921704195e-05, "data_time": 0.002764296531677246, "grad_norm": 0.3362912118434906, "loss": 0.47424852252006533, "time": 1.8583620071411133, "epoch": 685, "memory": 36824, "step": 214292}
{"lr": 8.876989351497787e-05, "data_time": 0.003002643585205078, "grad_norm": 0.3041488379240036, "loss": 0.47396515905857084, "time": 2.1169689893722534, "epoch": 685, "memory": 36824, "step": 214392}
{"lr": 8.822408708566297e-05, "data_time": 0.0027493953704833983, "grad_norm": 0.2977932780981064, "loss": 0.4722468912601471, "time": 1.86841561794281, "epoch": 686, "memory": 36824, "step": 214505}
{"lr": 8.77423946453819e-05, "data_time": 0.0028403282165527345, "grad_norm": 0.2936068058013916, "loss": 0.47290127277374266, "time": 1.8619806051254273, "epoch": 686, "memory": 36824, "step": 214605}
{"lr": 8.726194448917525e-05, "data_time": 0.0036535263061523438, "grad_norm": 0.30554460138082506, "loss": 0.47315876483917235, "time": 1.887220811843872, "epoch": 686, "memory": 36824, "step": 214705}
{"lr": 8.672053189150679e-05, "data_time": 0.4190415143966675, "grad_norm": 0.31736008524894715, "loss": 0.4725931644439697, "time": 2.0426433086395264, "epoch": 687, "memory": 36824, "step": 214818}
{"lr": 8.6242730591012e-05, "data_time": 0.002831172943115234, "grad_norm": 0.2996438562870026, "loss": 0.4704306572675705, "time": 1.823663353919983, "epoch": 687, "memory": 36824, "step": 214918}
{"lr": 8.576617419023075e-05, "data_time": 0.0034075021743774415, "grad_norm": 0.3080689698457718, "loss": 0.47286407351493837, "time": 2.15624794960022, "epoch": 687, "memory": 36824, "step": 215018}
{"lr": 8.5229164675456e-05, "data_time": 0.4473786592483521, "grad_norm": 0.3185748726129532, "loss": 0.4791255295276642, "time": 1.8235040426254272, "epoch": 688, "memory": 36824, "step": 215131}
{"lr": 8.475526267905285e-05, "data_time": 0.0025244235992431642, "grad_norm": 0.3115665167570114, "loss": 0.47250594198703766, "time": 1.8341876983642578, "epoch": 688, "memory": 36824, "step": 215231}
{"lr": 8.428260817673073e-05, "data_time": 0.003028607368469238, "grad_norm": 0.31271296739578247, "loss": 0.4739011824131012, "time": 1.760098433494568, "epoch": 688, "memory": 36824, "step": 215331}
{"lr": 8.37500109208607e-05, "data_time": 0.003004002571105957, "grad_norm": 0.2966232568025589, "loss": 0.47430633306503295, "time": 1.7971359729766845, "epoch": 689, "memory": 36824, "step": 215444}
{"lr": 8.328001632622732e-05, "data_time": 0.0027297258377075194, "grad_norm": 0.3001160860061646, "loss": 0.4684647679328918, "time": 1.8339586019515992, "epoch": 689, "memory": 36824, "step": 215544}
{"lr": 8.281127179872548e-05, "data_time": 0.002968120574951172, "grad_norm": 0.3023535966873169, "loss": 0.47523884773254393, "time": 2.0330881834030152, "epoch": 689, "memory": 36824, "step": 215644}
{"lr": 8.228309590237785e-05, "data_time": 0.45357418060302734, "grad_norm": 0.2949416011571884, "loss": 0.47474717199802396, "time": 1.8081424236297607, "epoch": 690, "memory": 36824, "step": 215757}
{"lr": 8.18170167404251e-05, "data_time": 0.0028555631637573243, "grad_norm": 0.3098583698272705, "loss": 0.46776449382305146, "time": 1.7998478651046752, "epoch": 690, "memory": 36824, "step": 215857}
{"lr": 8.135219019729439e-05, "data_time": 0.0035890817642211916, "grad_norm": 0.30573690533638, "loss": 0.47038509547710416, "time": 1.8983953475952149, "epoch": 690, "memory": 36824, "step": 215957}
{"lr": 8.082844468553786e-05, "data_time": 0.12916193008422852, "grad_norm": 0.3005295157432556, "loss": 0.4711187303066254, "time": 1.795183038711548, "epoch": 691, "memory": 36824, "step": 216070}
{"lr": 8.036628892027245e-05, "data_time": 0.002533626556396484, "grad_norm": 0.3343293100595474, "loss": 0.4731505364179611, "time": 1.8104777574539184, "epoch": 691, "memory": 36824, "step": 216170}
{"lr": 7.990538830411608e-05, "data_time": 0.003263258934020996, "grad_norm": 0.27405169457197187, "loss": 0.4730924069881439, "time": 1.9269425868988037, "epoch": 691, "memory": 36824, "step": 216270}
{"lr": 7.938608212631634e-05, "data_time": 0.1745044469833374, "grad_norm": 0.3087982326745987, "loss": 0.47461954951286317, "time": 1.8821003198623658, "epoch": 692, "memory": 36824, "step": 216383}
{"lr": 7.892785765470494e-05, "data_time": 0.00260014533996582, "grad_norm": 0.3089463412761688, "loss": 0.4749273002147675, "time": 1.8232414245605468, "epoch": 692, "memory": 36824, "step": 216483}
{"lr": 7.847089084104268e-05, "data_time": 0.003747367858886719, "grad_norm": 0.2845922619104385, "loss": 0.476184207201004, "time": 1.7920209646224976, "epoch": 692, "memory": 36824, "step": 216583}
{"lr": 7.79560328707097e-05, "data_time": 0.2836976766586304, "grad_norm": 0.315461540222168, "loss": 0.4667642891407013, "time": 1.8126460790634156, "epoch": 693, "memory": 36824, "step": 216696}
{"lr": 7.750174752254456e-05, "data_time": 0.0028957128524780273, "grad_norm": 0.300964480638504, "loss": 0.4728469878435135, "time": 2.039708685874939, "epoch": 693, "memory": 36824, "step": 216796}
{"lr": 7.704872231967858e-05, "data_time": 0.0029144048690795898, "grad_norm": 0.32580062448978425, "loss": 0.46932546198368075, "time": 1.7567654132843018, "epoch": 693, "memory": 36824, "step": 216896}
{"lr": 7.653832135431446e-05, "data_time": 0.11288263797760009, "grad_norm": 0.2943672776222229, "loss": 0.4715318292379379, "time": 1.8306147575378418, "epoch": 694, "memory": 36824, "step": 217009}
{"lr": 7.608798289207888e-05, "data_time": 0.0026632070541381834, "grad_norm": 0.33881796300411227, "loss": 0.47286897897720337, "time": 1.819309163093567, "epoch": 694, "memory": 36824, "step": 217109}
{"lr": 7.563890704095955e-05, "data_time": 0.003891468048095703, "grad_norm": 0.29383167922496795, "loss": 0.479257196187973, "time": 1.8427496671676635, "epoch": 694, "memory": 36824, "step": 217209}
{"lr": 7.513297180190839e-05, "data_time": 0.00270998477935791, "grad_norm": 0.3290811121463776, "loss": 0.4767166405916214, "time": 1.7964011669158935, "epoch": 695, "memory": 36824, "step": 217322}
{"lr": 7.468658792064391e-05, "data_time": 0.00293879508972168, "grad_norm": 0.31916446089744566, "loss": 0.4748733162879944, "time": 1.8373010158538818, "epoch": 695, "memory": 36824, "step": 217422}
{"lr": 7.424146909473878e-05, "data_time": 0.002991008758544922, "grad_norm": 0.31854703426361086, "loss": 0.47275751233100893, "time": 1.859328556060791, "epoch": 695, "memory": 36824, "step": 217522}
{"lr": 7.374000822703809e-05, "data_time": 0.1621256113052368, "grad_norm": 0.30887313187122345, "loss": 0.4761280000209808, "time": 1.8036559104919434, "epoch": 696, "memory": 36824, "step": 217635}
{"lr": 7.329758655421376e-05, "data_time": 0.0029262304306030273, "grad_norm": 0.3098453372716904, "loss": 0.473709374666214, "time": 1.7935530185699462, "epoch": 696, "memory": 36824, "step": 217735}
{"lr": 7.28564323593756e-05, "data_time": 0.003072786331176758, "grad_norm": 0.294536779820919, "loss": 0.47608314752578734, "time": 1.8908170938491822, "epoch": 696, "memory": 36824, "step": 217835}
{"lr": 7.235945443160804e-05, "data_time": 0.003005695343017578, "grad_norm": 0.30074447840452195, "loss": 0.475117427110672, "time": 1.8207035303115844, "epoch": 697, "memory": 36824, "step": 217948}
{"lr": 7.192100252698935e-05, "data_time": 0.002993607521057129, "grad_norm": 0.29707302898168564, "loss": 0.4703975677490234, "time": 1.8431047916412353, "epoch": 697, "memory": 36824, "step": 218048}
{"lr": 7.148382050132596e-05, "data_time": 0.0034293174743652345, "grad_norm": 0.2915648505091667, "loss": 0.4748701065778732, "time": 1.7704232931137085, "epoch": 697, "memory": 36824, "step": 218148}
{"lr": 7.09913340054727e-05, "data_time": 0.2688169479370117, "grad_norm": 0.31363431811332704, "loss": 0.47369067668914794, "time": 1.7602289915084839, "epoch": 698, "memory": 36824, "step": 218261}
{"lr": 7.055685936099315e-05, "data_time": 0.002896022796630859, "grad_norm": 0.3156668871641159, "loss": 0.4683173894882202, "time": 1.8513676643371582, "epoch": 698, "memory": 36824, "step": 218361}
{"lr": 7.012365697473908e-05, "data_time": 0.003830838203430176, "grad_norm": 0.3209004342556, "loss": 0.476772540807724, "time": 1.8272600889205932, "epoch": 698, "memory": 36824, "step": 218461}
{"lr": 6.96356703260358e-05, "data_time": 0.17777571678161622, "grad_norm": 0.28483876287937165, "loss": 0.4718594282865524, "time": 1.8025316953659059, "epoch": 699, "memory": 36824, "step": 218574}
{"lr": 6.920518036566846e-05, "data_time": 0.0027202367782592773, "grad_norm": 0.3080409899353981, "loss": 0.4721007406711578, "time": 1.7728964805603027, "epoch": 699, "memory": 36824, "step": 218674}
{"lr": 6.877596502105725e-05, "data_time": 0.0034433603286743164, "grad_norm": 0.30611965656280515, "loss": 0.4733205378055573, "time": 1.7357508897781373, "epoch": 699, "memory": 36824, "step": 218774}
{"lr": 6.829248655784965e-05, "data_time": 0.002665114402770996, "grad_norm": 0.30099600553512573, "loss": 0.47751557230949404, "time": 1.8171424627304078, "epoch": 700, "memory": 36824, "step": 218887}
{"lr": 6.786598863748e-05, "data_time": 0.0028947114944458006, "grad_norm": 0.3014196306467056, "loss": 0.4691933631896973, "time": 1.8203403234481812, "epoch": 700, "memory": 36824, "step": 218987}
{"lr": 6.744076766861763e-05, "data_time": 0.0037069320678710938, "grad_norm": 0.30889086425304413, "loss": 0.4768616259098053, "time": 1.8647231578826904, "epoch": 700, "memory": 36824, "step": 219087}
{"lr": 6.69618056522185e-05, "data_time": 0.0036035776138305664, "grad_norm": 0.3113147631287575, "loss": 0.46958532333374026, "time": 1.7857830286026002, "epoch": 701, "memory": 36824, "step": 219200}
{"lr": 6.653930705951904e-05, "data_time": 0.003617095947265625, "grad_norm": 0.304305174946785, "loss": 0.4744937837123871, "time": 1.8332794666290284, "epoch": 701, "memory": 36824, "step": 219300}
{"lr": 6.611808773225896e-05, "data_time": 0.0029798507690429687, "grad_norm": 0.29047022461891175, "loss": 0.4761367619037628, "time": 1.7868510484695435, "epoch": 701, "memory": 36824, "step": 219400}
{"lr": 6.564365034680734e-05, "data_time": 0.0028351545333862305, "grad_norm": 0.28898212015628816, "loss": 0.47011578977108004, "time": 1.8427440166473388, "epoch": 702, "memory": 36824, "step": 219513}
{"lr": 6.522515830111371e-05, "data_time": 0.002515220642089844, "grad_norm": 0.2922916740179062, "loss": 0.4704956620931625, "time": 1.8188390254974365, "epoch": 702, "memory": 36824, "step": 219613}
{"lr": 6.480794781293178e-05, "data_time": 0.002978348731994629, "grad_norm": 0.2945931315422058, "loss": 0.4737714737653732, "time": 1.7862988710403442, "epoch": 702, "memory": 36824, "step": 219713}
{"lr": 6.433804316525377e-05, "data_time": 0.08712568283081054, "grad_norm": 0.3195330649614334, "loss": 0.47370306551456454, "time": 1.828860092163086, "epoch": 703, "memory": 36824, "step": 219826}
{"lr": 6.392356481744037e-05, "data_time": 0.023768043518066405, "grad_norm": 0.3218999236822128, "loss": 0.4730442315340042, "time": 1.8450485229492188, "epoch": 703, "memory": 36824, "step": 219926}
{"lr": 6.35103702973125e-05, "data_time": 0.14042012691497802, "grad_norm": 0.287247970700264, "loss": 0.47444759011268617, "time": 1.8392735719680786, "epoch": 703, "memory": 36824, "step": 220026}
{"lr": 6.304500641678207e-05, "data_time": 0.3274386405944824, "grad_norm": 0.28917962610721587, "loss": 0.47447010576725007, "time": 1.8201454877853394, "epoch": 704, "memory": 36824, "step": 220139}
{"lr": 6.263454884914014e-05, "data_time": 0.15935759544372557, "grad_norm": 0.30669227689504625, "loss": 0.47167790234088897, "time": 1.786871075630188, "epoch": 704, "memory": 36824, "step": 220239}
{"lr": 6.222537735742098e-05, "data_time": 0.0039187192916870115, "grad_norm": 0.32475330531597135, "loss": 0.4682545781135559, "time": 1.8339590549468994, "epoch": 704, "memory": 36824, "step": 220339}
{"lr": 6.176456219582285e-05, "data_time": 0.012474799156188964, "grad_norm": 0.29681122601032256, "loss": 0.4720368355512619, "time": 1.8081240892410277, "epoch": 705, "memory": 36824, "step": 220452}
{"lr": 6.135813242193952e-05, "data_time": 0.0028996944427490236, "grad_norm": 0.3168660581111908, "loss": 0.4776231974363327, "time": 1.7678471565246583, "epoch": 705, "memory": 36824, "step": 220552}
{"lr": 6.095299095024082e-05, "data_time": 0.003713560104370117, "grad_norm": 0.3069883018732071, "loss": 0.47567436993122103, "time": 1.7572513580322267, "epoch": 705, "memory": 36824, "step": 220652}
{"lr": 6.0496732381634686e-05, "data_time": 0.06163766384124756, "grad_norm": 0.30924701541662214, "loss": 0.4790103226900101, "time": 1.8323010444641112, "epoch": 706, "memory": 36824, "step": 220765}
{"lr": 6.0094337346273614e-05, "data_time": 0.0028473854064941405, "grad_norm": 0.3044831484556198, "loss": 0.4777501404285431, "time": 1.7568424940109253, "epoch": 706, "memory": 36824, "step": 220865}
{"lr": 5.969323281734544e-05, "data_time": 0.003643798828125, "grad_norm": 0.29008139073848727, "loss": 0.47366714775562285, "time": 1.8500432252883912, "epoch": 706, "memory": 36824, "step": 220965}
{"lr": 5.924153863793128e-05, "data_time": 0.20281355381011962, "grad_norm": 0.28873243033885954, "loss": 0.4739267140626907, "time": 1.8508081674575805, "epoch": 707, "memory": 36824, "step": 221078}
{"lr": 5.884318521691332e-05, "data_time": 0.0026777029037475587, "grad_norm": 0.3171664610505104, "loss": 0.4728307515382767, "time": 1.836387324333191, "epoch": 707, "memory": 36824, "step": 221178}
{"lr": 5.844612448452508e-05, "data_time": 0.003136301040649414, "grad_norm": 0.31536454558372495, "loss": 0.4759592831134796, "time": 1.8558992862701416, "epoch": 707, "memory": 36824, "step": 221278}
{"lr": 5.799900241251033e-05, "data_time": 0.11370396614074707, "grad_norm": 0.3304575592279434, "loss": 0.4708417683839798, "time": 1.819195318222046, "epoch": 708, "memory": 36824, "step": 221391}
{"lr": 5.7604697412596524e-05, "data_time": 0.0028385639190673826, "grad_norm": 0.3024126052856445, "loss": 0.47523124516010284, "time": 1.756782627105713, "epoch": 708, "memory": 36824, "step": 221491}
{"lr": 5.721168726142056e-05, "data_time": 0.0033200263977050783, "grad_norm": 0.30134797394275664, "loss": 0.476899528503418, "time": 1.820308017730713, "epoch": 708, "memory": 36824, "step": 221591}
{"lr": 5.6769144936887766e-05, "data_time": 0.29051756858825684, "grad_norm": 0.3068735033273697, "loss": 0.47385685741901395, "time": 1.8202836513519287, "epoch": 709, "memory": 36824, "step": 221704}
{"lr": 5.637889509566243e-05, "data_time": 0.002800869941711426, "grad_norm": 0.31819511353969576, "loss": 0.46895510256290435, "time": 1.8634114980697631, "epoch": 709, "memory": 36824, "step": 221804}
{"lr": 5.59899422411582e-05, "data_time": 0.0031056642532348634, "grad_norm": 0.2982719302177429, "loss": 0.47318108975887296, "time": 1.7790602445602417, "epoch": 709, "memory": 36824, "step": 221904}
{"lr": 5.5551987225934625e-05, "data_time": 0.2162576675415039, "grad_norm": 0.29647159278392793, "loss": 0.47781701385974884, "time": 1.8195178508758545, "epoch": 710, "memory": 36824, "step": 222017}
{"lr": 5.5165799211690945e-05, "data_time": 0.002776432037353516, "grad_norm": 0.2847711741924286, "loss": 0.4780150234699249, "time": 1.7704687118530273, "epoch": 710, "memory": 36824, "step": 222117}
{"lr": 5.478091029998952e-05, "data_time": 0.0036605119705200194, "grad_norm": 0.29862835109233854, "loss": 0.472222700715065, "time": 1.7663715124130248, "epoch": 710, "memory": 36824, "step": 222217}
{"lr": 5.434755007751788e-05, "data_time": 0.31757540702819825, "grad_norm": 0.29380314648151395, "loss": 0.4732646644115448, "time": 1.8550270795822144, "epoch": 711, "memory": 36824, "step": 222330}
{"lr": 5.3965430489143284e-05, "data_time": 0.003870391845703125, "grad_norm": 0.31943894624710084, "loss": 0.4765037208795547, "time": 1.785346221923828, "epoch": 711, "memory": 36824, "step": 222430}
{"lr": 5.3584612096934194e-05, "data_time": 0.003755474090576172, "grad_norm": 0.290210822224617, "loss": 0.4717635989189148, "time": 1.8240823984146117, "epoch": 711, "memory": 36824, "step": 222530}
{"lr": 5.3155854072145114e-05, "data_time": 0.002739262580871582, "grad_norm": 0.2975343883037567, "loss": 0.46980424523353576, "time": 1.8520471572875976, "epoch": 712, "memory": 36824, "step": 222643}
{"lr": 5.2777809439009193e-05, "data_time": 0.002750706672668457, "grad_norm": 0.29709338694810866, "loss": 0.4748172342777252, "time": 1.7790045738220215, "epoch": 712, "memory": 36824, "step": 222743}
{"lr": 5.240106807342808e-05, "data_time": 0.0029877662658691407, "grad_norm": 0.2981231927871704, "loss": 0.4727010577917099, "time": 1.7437639474868774, "epoch": 712, "memory": 36824, "step": 222843}
{"lr": 5.1976919572613306e-05, "data_time": 0.002816510200500488, "grad_norm": 0.32824419140815736, "loss": 0.471821591258049, "time": 1.7929497241973877, "epoch": 713, "memory": 36824, "step": 222956}
{"lr": 5.1602956354455906e-05, "data_time": 0.0025710821151733398, "grad_norm": 0.30720538198947905, "loss": 0.47074310183525087, "time": 1.7175276279449463, "epoch": 713, "memory": 36824, "step": 223056}
{"lr": 5.123029845297299e-05, "data_time": 0.0034712076187133787, "grad_norm": 0.2926409006118774, "loss": 0.47317295968532563, "time": 1.8380301952362061, "epoch": 713, "memory": 36824, "step": 223156}
{"lr": 5.081076672365999e-05, "data_time": 0.34197225570678713, "grad_norm": 0.2958092957735062, "loss": 0.4710937112569809, "time": 1.8749238967895507, "epoch": 714, "memory": 36824, "step": 223269}
{"lr": 5.044089131048063e-05, "data_time": 0.0024986743927001955, "grad_norm": 0.28681476712226867, "loss": 0.47408739030361174, "time": 1.829442572593689, "epoch": 714, "memory": 36824, "step": 223369}
{"lr": 5.007232324079149e-05, "data_time": 0.003713822364807129, "grad_norm": 0.30419073402881625, "loss": 0.47032971382141114, "time": 1.729269528388977, "epoch": 714, "memory": 36824, "step": 223469}
{"lr": 4.965741545162007e-05, "data_time": 0.075034499168396, "grad_norm": 0.2924020692706108, "loss": 0.47433356046676634, "time": 1.8110060453414918, "epoch": 715, "memory": 36824, "step": 223582}
{"lr": 4.929163416356878e-05, "data_time": 0.003044390678405762, "grad_norm": 0.321834796667099, "loss": 0.46877201199531554, "time": 1.779341459274292, "epoch": 715, "memory": 36824, "step": 223682}
{"lr": 4.8927162223484816e-05, "data_time": 0.0030234575271606444, "grad_norm": 0.29638884514570235, "loss": 0.4745901584625244, "time": 1.753894853591919, "epoch": 715, "memory": 36824, "step": 223782}
{"lr": 4.851688546408439e-05, "data_time": 0.1385904312133789, "grad_norm": 0.29554948806762693, "loss": 0.46965721249580383, "time": 1.8245903491973876, "epoch": 716, "memory": 36824, "step": 223895}
{"lr": 4.815520455135445e-05, "data_time": 0.0027695178985595705, "grad_norm": 0.28913341015577315, "loss": 0.4692074567079544, "time": 1.810176634788513, "epoch": 716, "memory": 36824, "step": 223995}
{"lr": 4.7794834968695506e-05, "data_time": 0.0031651020050048827, "grad_norm": 0.27824325710535047, "loss": 0.47141886353492735, "time": 1.7554674863815307, "epoch": 716, "memory": 36824, "step": 224095}
{"lr": 4.7389196249563675e-05, "data_time": 0.019095849990844727, "grad_norm": 0.30097516626119614, "loss": 0.4691976398229599, "time": 1.8007668733596802, "epoch": 717, "memory": 36824, "step": 224208}
{"lr": 4.703162189228403e-05, "data_time": 0.0030817270278930666, "grad_norm": 0.28271358609199526, "loss": 0.47001933455467226, "time": 1.8564127445220948, "epoch": 717, "memory": 36824, "step": 224308}
{"lr": 4.66753608247717e-05, "data_time": 0.0028872966766357424, "grad_norm": 0.3164214789867401, "loss": 0.47321543395519255, "time": 1.889792013168335, "epoch": 717, "memory": 36824, "step": 224408}
{"lr": 4.6274367077155094e-05, "data_time": 0.022612524032592774, "grad_norm": 0.3028679579496384, "loss": 0.4732029944658279, "time": 1.8774818181991577, "epoch": 718, "memory": 36824, "step": 224521}
{"lr": 4.592090538528479e-05, "data_time": 0.003188824653625488, "grad_norm": 0.31601484715938566, "loss": 0.4731423258781433, "time": 1.808622145652771, "epoch": 718, "memory": 36824, "step": 224621}
{"lr": 4.556875892043796e-05, "data_time": 0.003512454032897949, "grad_norm": 0.29452978670597074, "loss": 0.46798697113990784, "time": 1.923785352706909, "epoch": 718, "memory": 36824, "step": 224721}
{"lr": 4.5172416996213364e-05, "data_time": 0.32734880447387693, "grad_norm": 0.3047907710075378, "loss": 0.4749248683452606, "time": 1.8721080780029298, "epoch": 719, "memory": 36824, "step": 224834}
{"lr": 4.4823074009437685e-05, "data_time": 0.0028430461883544923, "grad_norm": 0.33271175622940063, "loss": 0.4707968682050705, "time": 1.7513153314590455, "epoch": 719, "memory": 36824, "step": 224934}
{"lr": 4.4475048164467935e-05, "data_time": 0.003306293487548828, "grad_norm": 0.32657880783081056, "loss": 0.4754848122596741, "time": 1.7619375467300415, "epoch": 719, "memory": 36824, "step": 225034}
{"lr": 4.40833648360254e-05, "data_time": 0.1033738374710083, "grad_norm": 0.2937274187803268, "loss": 0.4758027881383896, "time": 1.791243314743042, "epoch": 720, "memory": 36824, "step": 225147}
{"lr": 4.3738146523651624e-05, "data_time": 0.0026289701461791994, "grad_norm": 0.2978406727313995, "loss": 0.4711280554533005, "time": 1.8460344791412353, "epoch": 720, "memory": 36824, "step": 225247}
{"lr": 4.3394247245360346e-05, "data_time": 0.0034445047378540037, "grad_norm": 0.3034278959035873, "loss": 0.4741125851869583, "time": 1.7885828971862794, "epoch": 720, "memory": 36824, "step": 225347}
{"lr": 4.300722920548716e-05, "data_time": 0.04213235378265381, "grad_norm": 0.2931124895811081, "loss": 0.47383167445659635, "time": 1.8070609807968139, "epoch": 721, "memory": 36824, "step": 225460}
{"lr": 4.266614146634402e-05, "data_time": 0.0030644893646240234, "grad_norm": 0.2958347946405411, "loss": 0.4741958141326904, "time": 1.8044753789901733, "epoch": 721, "memory": 36824, "step": 225560}
{"lr": 4.2326374631021e-05, "data_time": 0.003912186622619629, "grad_norm": 0.3020080864429474, "loss": 0.4733698070049286, "time": 1.7647275447845459, "epoch": 721, "memory": 36824, "step": 225660}
{"lr": 4.194402849278756e-05, "data_time": 0.03333733081817627, "grad_norm": 0.2923619210720062, "loss": 0.47498310506343844, "time": 1.7809165716171265, "epoch": 722, "memory": 36824, "step": 225773}
{"lr": 4.160707715512369e-05, "data_time": 0.0024855375289916993, "grad_norm": 0.29647051095962523, "loss": 0.4777942955493927, "time": 1.8370799779891969, "epoch": 722, "memory": 36824, "step": 225873}
{"lr": 4.1271448568446505e-05, "data_time": 0.0032598257064819338, "grad_norm": 0.29399336576461793, "loss": 0.4779144644737244, "time": 1.7940248727798462, "epoch": 722, "memory": 36824, "step": 225973}
{"lr": 4.0893780865093624e-05, "data_time": 0.13861405849456787, "grad_norm": 0.30638022124767306, "loss": 0.4759684085845947, "time": 1.848565673828125, "epoch": 723, "memory": 36824, "step": 226086}
{"lr": 4.0560971686477494e-05, "data_time": 0.0028395891189575196, "grad_norm": 0.2821140557527542, "loss": 0.46810539364814757, "time": 1.8216357469558715, "epoch": 723, "memory": 36824, "step": 226186}
{"lr": 4.022948708341287e-05, "data_time": 0.003593850135803223, "grad_norm": 0.32742986679077146, "loss": 0.47077083885669707, "time": 1.8100794315338136, "epoch": 723, "memory": 36824, "step": 226286}
{"lr": 3.985650426823935e-05, "data_time": 0.18738067150115967, "grad_norm": 0.3059907376766205, "loss": 0.4746137589216232, "time": 1.8214324474334718, "epoch": 724, "memory": 36824, "step": 226399}
{"lr": 3.95278429354615e-05, "data_time": 0.002837800979614258, "grad_norm": 0.29851922392845154, "loss": 0.4727588266134262, "time": 1.8323596239089965, "epoch": 724, "memory": 36824, "step": 226499}
{"lr": 3.9200507980166846e-05, "data_time": 0.003070569038391113, "grad_norm": 0.29461451172828673, "loss": 0.4725214093923569, "time": 1.8042321681976319, "epoch": 724, "memory": 36824, "step": 226599}
{"lr": 3.883221642641992e-05, "data_time": 0.10017518997192383, "grad_norm": 0.2927304029464722, "loss": 0.47245201766490935, "time": 1.8271758556365967, "epoch": 725, "memory": 36824, "step": 226712}
{"lr": 3.8507708555395694e-05, "data_time": 0.002660655975341797, "grad_norm": 0.3077485591173172, "loss": 0.47015564143657684, "time": 1.838918137550354, "epoch": 725, "memory": 36824, "step": 226812}
{"lr": 3.818452884112208e-05, "data_time": 0.002990269660949707, "grad_norm": 0.29093706607818604, "loss": 0.4749183565378189, "time": 1.834011960029602, "epoch": 725, "memory": 36824, "step": 226912}
{"lr": 3.78209348418884e-05, "data_time": 0.15244665145874023, "grad_norm": 0.2976014941930771, "loss": 0.47578090727329253, "time": 1.8331223249435424, "epoch": 726, "memory": 36824, "step": 227025}
{"lr": 3.750058597756212e-05, "data_time": 0.002700495719909668, "grad_norm": 0.29736418426036837, "loss": 0.47059233784675597, "time": 1.9221191167831422, "epoch": 726, "memory": 36824, "step": 227125}
{"lr": 3.718156702655914e-05, "data_time": 0.003657102584838867, "grad_norm": 0.2797944813966751, "loss": 0.47492483258247375, "time": 1.806035590171814, "epoch": 726, "memory": 36824, "step": 227225}
{"lr": 3.682267679465677e-05, "data_time": 0.1792902946472168, "grad_norm": 0.29907690584659574, "loss": 0.47465130090713503, "time": 1.8515462398529052, "epoch": 727, "memory": 36824, "step": 227338}
{"lr": 3.650649241090663e-05, "data_time": 0.002722001075744629, "grad_norm": 0.29450803995132446, "loss": 0.47860724329948423, "time": 1.7295757293701173, "epoch": 727, "memory": 36824, "step": 227438}
{"lr": 3.61916396743279e-05, "data_time": 0.0032424449920654295, "grad_norm": 0.30727121233940125, "loss": 0.47059867680072787, "time": 1.7794479370117187, "epoch": 727, "memory": 36824, "step": 227538}
{"lr": 3.583745934220081e-05, "data_time": 0.3181328058242798, "grad_norm": 0.2964560866355896, "loss": 0.47411524653434756, "time": 1.8168740272521973, "epoch": 728, "memory": 36824, "step": 227651}
{"lr": 3.5525444841745474e-05, "data_time": 0.0026656150817871093, "grad_norm": 0.31079221367835996, "loss": 0.47628320157527926, "time": 1.746568489074707, "epoch": 728, "memory": 36824, "step": 227751}
{"lr": 3.521476369955583e-05, "data_time": 0.0035079240798950194, "grad_norm": 0.3085216671228409, "loss": 0.4748837977647781, "time": 1.793796920776367, "epoch": 728, "memory": 36824, "step": 227851}
{"lr": 3.4865299319168706e-05, "data_time": 0.3971232891082764, "grad_norm": 0.30407795011997224, "loss": 0.47187464237213134, "time": 1.7819422960281373, "epoch": 729, "memory": 36824, "step": 227964}
{"lr": 3.4557460033475053e-05, "data_time": 0.002664923667907715, "grad_norm": 0.2775308951735497, "loss": 0.4735764294862747, "time": 1.855173921585083, "epoch": 729, "memory": 36824, "step": 228064}
{"lr": 3.4250955794357925e-05, "data_time": 0.0034034252166748047, "grad_norm": 0.30052571296691893, "loss": 0.47349716126918795, "time": 1.8467430830001832, "epoch": 729, "memory": 36824, "step": 228164}
{"lr": 3.3906213337092645e-05, "data_time": 0.18367524147033693, "grad_norm": 0.30433780550956724, "loss": 0.47789821624755857, "time": 1.788735604286194, "epoch": 730, "memory": 36824, "step": 228277}
{"lr": 3.36025545262849e-05, "data_time": 0.0026053905487060545, "grad_norm": 0.28612604886293413, "loss": 0.4738172680139542, "time": 1.788347554206848, "epoch": 730, "memory": 36824, "step": 228377}
{"lr": 3.3300232427551885e-05, "data_time": 0.002970123291015625, "grad_norm": 0.30634641647338867, "loss": 0.46879830956459045, "time": 1.7375944137573243, "epoch": 730, "memory": 36824, "step": 228477}
{"lr": 3.296021778410608e-05, "data_time": 0.0028665542602539064, "grad_norm": 0.3000524461269379, "loss": 0.467962446808815, "time": 1.7717095851898192, "epoch": 731, "memory": 36824, "step": 228590}
{"lr": 3.2660744636875383e-05, "data_time": 0.0031241893768310545, "grad_norm": 0.30236233174800875, "loss": 0.4697872340679169, "time": 1.728976321220398, "epoch": 731, "memory": 36824, "step": 228690}
{"lr": 3.2362609844376956e-05, "data_time": 0.0034981727600097655, "grad_norm": 0.29389245212078097, "loss": 0.4711324483156204, "time": 1.7880142211914063, "epoch": 731, "memory": 36824, "step": 228790}
{"lr": 3.202732882466258e-05, "data_time": 0.0027005910873413087, "grad_norm": 0.31127912402153013, "loss": 0.47061113119125364, "time": 1.8142854213714599, "epoch": 732, "memory": 36824, "step": 228903}
{"lr": 3.173204645817893e-05, "data_time": 0.002770066261291504, "grad_norm": 0.310611292719841, "loss": 0.4695598632097244, "time": 1.7868555307388305, "epoch": 732, "memory": 36824, "step": 229003}
{"lr": 3.143810406621625e-05, "data_time": 0.003284931182861328, "grad_norm": 0.2980633586645126, "loss": 0.4700850069522858, "time": 1.8012335538864135, "epoch": 732, "memory": 36824, "step": 229103}
{"lr": 3.110756239926099e-05, "data_time": 0.2593810796737671, "grad_norm": 0.29729351997375486, "loss": 0.47412300407886504, "time": 1.80274178981781, "epoch": 733, "memory": 36824, "step": 229216}
{"lr": 3.081647585908499e-05, "data_time": 0.0025159358978271485, "grad_norm": 0.30797581672668456, "loss": 0.4725533902645111, "time": 1.8202055931091308, "epoch": 733, "memory": 36824, "step": 229316}
{"lr": 3.052673089032269e-05, "data_time": 0.004024982452392578, "grad_norm": 0.3133771240711212, "loss": 0.47334332168102267, "time": 1.8069614171981812, "epoch": 733, "memory": 36824, "step": 229416}
{"lr": 3.020093422417167e-05, "data_time": 0.002708840370178223, "grad_norm": 0.3059378653764725, "loss": 0.471001410484314, "time": 1.7950698137283325, "epoch": 734, "memory": 36824, "step": 229529}
{"lr": 2.9914048484169172e-05, "data_time": 0.002724313735961914, "grad_norm": 0.3204192638397217, "loss": 0.4733800947666168, "time": 1.8489975690841676, "epoch": 734, "memory": 36824, "step": 229629}
{"lr": 2.962850588954917e-05, "data_time": 0.0032529115676879885, "grad_norm": 0.302182038128376, "loss": 0.4773282617330551, "time": 1.806387233734131, "epoch": 734, "memory": 36824, "step": 229729}
{"lr": 2.9307459791168385e-05, "data_time": 0.22699143886566162, "grad_norm": 0.30604521930217743, "loss": 0.47625607550144194, "time": 1.8141151189804077, "epoch": 735, "memory": 36824, "step": 229842}
{"lr": 2.9024779753425286e-05, "data_time": 0.0029835939407348634, "grad_norm": 0.28675723969936373, "loss": 0.479252889752388, "time": 1.8133344411849976, "epoch": 735, "memory": 36824, "step": 229942}
{"lr": 2.8743444412082725e-05, "data_time": 0.0035524606704711915, "grad_norm": 0.30518620312213895, "loss": 0.47284531891345977, "time": 1.7447811603546142, "epoch": 735, "memory": 36824, "step": 230042}
{"lr": 2.8427154367264343e-05, "data_time": 0.0025995969772338867, "grad_norm": 0.2860078886151314, "loss": 0.47109163999557496, "time": 1.7935578107833863, "epoch": 736, "memory": 36824, "step": 230155}
{"lr": 2.81486848620026e-05, "data_time": 0.0030449628829956055, "grad_norm": 0.2991963654756546, "loss": 0.4705408185720444, "time": 1.859977650642395, "epoch": 736, "memory": 36824, "step": 230255}
{"lr": 2.7871561581182142e-05, "data_time": 0.003057241439819336, "grad_norm": 0.2888574987649918, "loss": 0.47040770649909974, "time": 1.772696280479431, "epoch": 736, "memory": 36824, "step": 230355}
{"lr": 2.7560032994450115e-05, "data_time": 0.09273443222045899, "grad_norm": 0.29898730516433714, "loss": 0.4761824607849121, "time": 1.7977518796920777, "epoch": 737, "memory": 36824, "step": 230468}
{"lr": 2.728577877994528e-05, "data_time": 0.00268402099609375, "grad_norm": 0.30903648138046264, "loss": 0.4736680746078491, "time": 1.7625463008880615, "epoch": 737, "memory": 36824, "step": 230568}
{"lr": 2.7012872294919136e-05, "data_time": 0.003272342681884766, "grad_norm": 0.31198396980762483, "loss": 0.47339258193969724, "time": 1.772303318977356, "epoch": 737, "memory": 36824, "step": 230668}
{"lr": 2.6706110489437808e-05, "data_time": 0.026871228218078615, "grad_norm": 0.2858692079782486, "loss": 0.476216983795166, "time": 1.797777032852173, "epoch": 738, "memory": 36824, "step": 230781}
{"lr": 2.643607625193754e-05, "data_time": 0.002823615074157715, "grad_norm": 0.29444931745529174, "loss": 0.47303480207920073, "time": 1.7001095771789552, "epoch": 738, "memory": 36824, "step": 230881}
{"lr": 2.6167391225924763e-05, "data_time": 0.0030457258224487306, "grad_norm": 0.3024689644575119, "loss": 0.47649443447589873, "time": 1.7811644315719604, "epoch": 738, "memory": 36824, "step": 230981}
{"lr": 2.5865401443406793e-05, "data_time": 0.24557740688323976, "grad_norm": 0.30977822840213776, "loss": 0.4695093631744385, "time": 1.8102542638778687, "epoch": 739, "memory": 36824, "step": 231094}
{"lr": 2.5599591797051176e-05, "data_time": 0.002759718894958496, "grad_norm": 0.2975736543536186, "loss": 0.4725140154361725, "time": 1.8393715620040894, "epoch": 739, "memory": 36824, "step": 231194}
{"lr": 2.5335132821137495e-05, "data_time": 0.003232717514038086, "grad_norm": 0.2878291502594948, "loss": 0.47445051968097685, "time": 1.8104753017425537, "epoch": 739, "memory": 36824, "step": 231294}
{"lr": 2.5037920221755016e-05, "data_time": 0.30493223667144775, "grad_norm": 0.2855979219079018, "loss": 0.471745166182518, "time": 1.7773279905319215, "epoch": 740, "memory": 36824, "step": 231407}
{"lr": 2.4776339708497658e-05, "data_time": 0.0028026342391967774, "grad_norm": 0.31473067700862883, "loss": 0.474242103099823, "time": 1.8235828638076783, "epoch": 740, "memory": 36824, "step": 231507}
{"lr": 2.4516111301557174e-05, "data_time": 0.0034735679626464845, "grad_norm": 0.27814971357584, "loss": 0.47111099362373354, "time": 1.817837619781494, "epoch": 740, "memory": 36824, "step": 231607}
{"lr": 2.4223680963853538e-05, "data_time": 0.17847762107849122, "grad_norm": 0.3002644836902618, "loss": 0.47006849348545077, "time": 1.8087156057357787, "epoch": 741, "memory": 36824, "step": 231720}
{"lr": 2.3966334053383575e-05, "data_time": 0.0026430368423461916, "grad_norm": 0.2913369208574295, "loss": 0.4743963986635208, "time": 1.84498188495636, "epoch": 741, "memory": 36824, "step": 231820}
{"lr": 2.371034066200186e-05, "data_time": 0.003141450881958008, "grad_norm": 0.29669844210147855, "loss": 0.4742850959300995, "time": 1.8015607595443726, "epoch": 741, "memory": 36824, "step": 231920}
{"lr": 2.342269758280472e-05, "data_time": 0.21454758644104005, "grad_norm": 0.29398790597915647, "loss": 0.4717735856771469, "time": 1.8168401718139648, "epoch": 742, "memory": 36824, "step": 232033}
{"lr": 2.3169588672470878e-05, "data_time": 0.0028777122497558594, "grad_norm": 0.29512207508087157, "loss": 0.4709044247865677, "time": 1.7717630386352539, "epoch": 742, "memory": 36824, "step": 232133}
{"lr": 2.2917834670868738e-05, "data_time": 0.0031514644622802736, "grad_norm": 0.29133377969264984, "loss": 0.4688729405403137, "time": 1.8281183958053588, "epoch": 742, "memory": 36824, "step": 232233}
{"lr": 2.2634983765204624e-05, "data_time": 0.16949200630187988, "grad_norm": 0.28898979872465136, "loss": 0.4727130323648453, "time": 1.7749420166015626, "epoch": 743, "memory": 36824, "step": 232346}
{"lr": 2.2386117179939734e-05, "data_time": 0.0030189990997314454, "grad_norm": 0.30612666457891463, "loss": 0.47304750680923463, "time": 1.9446316719055177, "epoch": 743, "memory": 36824, "step": 232446}
{"lr": 2.213860686989834e-05, "data_time": 0.0031633615493774415, "grad_norm": 0.2987526684999466, "loss": 0.47526881098747253, "time": 1.8071271419525146, "epoch": 743, "memory": 36824, "step": 232546}
{"lr": 2.1860552970908857e-05, "data_time": 0.42696008682250974, "grad_norm": 0.32416050136089325, "loss": 0.4755345046520233, "time": 2.014144945144653, "epoch": 744, "memory": 36824, "step": 232659}
{"lr": 2.1615932963156172e-05, "data_time": 0.0027683496475219725, "grad_norm": 0.32519247829914094, "loss": 0.47571619749069216, "time": 2.104144549369812, "epoch": 744, "memory": 36824, "step": 232759}
{"lr": 2.1372670573943925e-05, "data_time": 0.002979421615600586, "grad_norm": 0.2959181696176529, "loss": 0.4751245528459549, "time": 1.790609073638916, "epoch": 744, "memory": 36824, "step": 232859}
{"lr": 2.109941843280311e-05, "data_time": 0.08044445514678955, "grad_norm": 0.2834993839263916, "loss": 0.4694936364889145, "time": 1.7846799612045288, "epoch": 745, "memory": 36824, "step": 232972}
{"lr": 2.085904918244389e-05, "data_time": 0.0030244112014770506, "grad_norm": 0.27579714059829713, "loss": 0.47526580691337583, "time": 1.8230329751968384, "epoch": 745, "memory": 36824, "step": 233072}
{"lr": 2.062003887074359e-05, "data_time": 0.003151726722717285, "grad_norm": 0.30462982058525084, "loss": 0.4731457978487015, "time": 1.8153985500335694, "epoch": 745, "memory": 36824, "step": 233172}
{"lr": 2.035159315657635e-05, "data_time": 0.2307835340499878, "grad_norm": 0.2839581251144409, "loss": 0.46851239502429964, "time": 1.8626534461975097, "epoch": 746, "memory": 36824, "step": 233285}
{"lr": 2.011547877085775e-05, "data_time": 0.0030175924301147463, "grad_norm": 0.30377250611782075, "loss": 0.475413516163826, "time": 1.9942225694656373, "epoch": 746, "memory": 36824, "step": 233385}
{"lr": 1.9880724620696406e-05, "data_time": 0.0034328699111938477, "grad_norm": 0.2871367305517197, "loss": 0.4743207603693008, "time": 1.8214022159576415, "epoch": 746, "memory": 36824, "step": 233485}
{"lr": 1.961708992049957e-05, "data_time": 0.12933726310729982, "grad_norm": 0.29053663462400436, "loss": 0.4774288684129715, "time": 1.7899303197860719, "epoch": 747, "memory": 36824, "step": 233598}
{"lr": 1.9385234433964942e-05, "data_time": 0.002916836738586426, "grad_norm": 0.30803384333848954, "loss": 0.4734085351228714, "time": 1.8205233335494995, "epoch": 747, "memory": 36824, "step": 233698}
{"lr": 1.9154740456643245e-05, "data_time": 0.003155350685119629, "grad_norm": 0.2775208130478859, "loss": 0.47195668518543243, "time": 1.7741981744766235, "epoch": 747, "memory": 36824, "step": 233798}
{"lr": 1.8895921275206365e-05, "data_time": 0.2189553499221802, "grad_norm": 0.27357606291770936, "loss": 0.4732081174850464, "time": 1.8098810195922852, "epoch": 748, "memory": 36824, "step": 233911}
{"lr": 1.866832864962618e-05, "data_time": 0.0027938127517700196, "grad_norm": 0.33481254428625107, "loss": 0.4724689513444901, "time": 1.7896366357803344, "epoch": 748, "memory": 36824, "step": 234011}
{"lr": 1.8442098783650348e-05, "data_time": 0.0029510498046875, "grad_norm": 0.2979853957891464, "loss": 0.47001748979091645, "time": 1.8248890399932862, "epoch": 748, "memory": 36824, "step": 234111}
{"lr": 1.81880995434791e-05, "data_time": 0.0027693510055541992, "grad_norm": 0.3147455483675003, "loss": 0.47627187967300416, "time": 1.811238670349121, "epoch": 749, "memory": 36824, "step": 234224}
{"lr": 1.7964773667783453e-05, "data_time": 0.003030228614807129, "grad_norm": 0.30403770208358766, "loss": 0.4733974069356918, "time": 1.7888996124267578, "epoch": 749, "memory": 36824, "step": 234324}
{"lr": 1.7742811778797493e-05, "data_time": 0.003830432891845703, "grad_norm": 0.2902797058224678, "loss": 0.4746239840984344, "time": 1.7652941942214966, "epoch": 749, "memory": 36824, "step": 234424}
{"lr": 1.749363682003811e-05, "data_time": 0.10845685005187988, "grad_norm": 0.2877010241150856, "loss": 0.4702716052532196, "time": 1.836336898803711, "epoch": 750, "memory": 36824, "step": 234537}
{"lr": 1.7274581510250107e-05, "data_time": 0.0029083967208862306, "grad_norm": 0.303039126098156, "loss": 0.47144063413143156, "time": 1.9039167642593384, "epoch": 750, "memory": 36824, "step": 234637}
{"lr": 1.7056891390970274e-05, "data_time": 0.003638744354248047, "grad_norm": 0.29372857213020326, "loss": 0.47553500831127166, "time": 1.841561460494995, "epoch": 750, "memory": 36824, "step": 234737}
{"lr": 1.6812544971335346e-05, "data_time": 0.14400124549865723, "grad_norm": 0.29010557681322097, "loss": 0.4755417346954346, "time": 1.8289947986602784, "epoch": 751, "memory": 36824, "step": 234850}
{"lr": 1.6597763970505768e-05, "data_time": 0.0027028322219848633, "grad_norm": 0.27867896258831026, "loss": 0.47440841794013977, "time": 1.771974802017212, "epoch": 751, "memory": 36824, "step": 234950}
{"lr": 1.6384349340655824e-05, "data_time": 0.0031738519668579102, "grad_norm": 0.29445366710424425, "loss": 0.4742182582616806, "time": 1.8864567041397096, "epoch": 751, "memory": 36824, "step": 235050}
{"lr": 1.6144835635351466e-05, "data_time": 0.3227304697036743, "grad_norm": 0.2947527915239334, "loss": 0.471639358997345, "time": 1.7997612476348877, "epoch": 752, "memory": 36824, "step": 235163}
{"lr": 1.5934332613495005e-05, "data_time": 0.0030359983444213866, "grad_norm": 0.27956528812646864, "loss": 0.4724617928266525, "time": 1.8074403524398803, "epoch": 752, "memory": 36824, "step": 235263}
{"lr": 1.5725197119741902e-05, "data_time": 0.003188467025756836, "grad_norm": 0.2998510211706161, "loss": 0.4729345887899399, "time": 1.8241660118103027, "epoch": 752, "memory": 36824, "step": 235363}
{"lr": 1.5490520221396525e-05, "data_time": 0.15560464859008788, "grad_norm": 0.2818588182330132, "loss": 0.47341398894786835, "time": 1.8054453134536743, "epoch": 753, "memory": 36824, "step": 235476}
{"lr": 1.5284298775429088e-05, "data_time": 0.0027390241622924803, "grad_norm": 0.285100032389164, "loss": 0.47307131588459017, "time": 1.7890799999237061, "epoch": 753, "memory": 36824, "step": 235576}
{"lr": 1.50794459913215e-05, "data_time": 0.0030758857727050783, "grad_norm": 0.2994809836149216, "loss": 0.4764771282672882, "time": 1.8265963554382325, "epoch": 753, "memory": 36824, "step": 235676}
{"lr": 1.4849609909915662e-05, "data_time": 0.21189401149749756, "grad_norm": 0.2936955153942108, "loss": 0.47192228138446807, "time": 1.7884110450744628, "epoch": 754, "memory": 36824, "step": 235789}
{"lr": 1.464767356359276e-05, "data_time": 0.003964948654174805, "grad_norm": 0.30483147501945496, "loss": 0.4780051797628403, "time": 1.8067879915237426, "epoch": 754, "memory": 36824, "step": 235889}
{"lr": 1.4447106989499453e-05, "data_time": 0.004107046127319336, "grad_norm": 0.2941994726657867, "loss": 0.4751384824514389, "time": 2.050793409347534, "epoch": 754, "memory": 36824, "step": 235989}
{"lr": 1.4222115652297843e-05, "data_time": 0.37003676891326903, "grad_norm": 0.31197524517774583, "loss": 0.47294610142707827, "time": 1.8404882669448852, "epoch": 755, "memory": 36824, "step": 236102}
{"lr": 1.4024467856154536e-05, "data_time": 0.0033836603164672852, "grad_norm": 0.2938314020633698, "loss": 0.471305125951767, "time": 1.7927058219909668, "epoch": 755, "memory": 36824, "step": 236202}
{"lr": 1.382819091920495e-05, "data_time": 0.003024435043334961, "grad_norm": 0.300480791926384, "loss": 0.4682231605052948, "time": 1.8149465084075929, "epoch": 755, "memory": 36824, "step": 236302}
{"lr": 1.3608048170688649e-05, "data_time": 0.09800910949707031, "grad_norm": 0.2823556333780289, "loss": 0.4687525898218155, "time": 1.8198177099227906, "epoch": 756, "memory": 36824, "step": 236415}
{"lr": 1.3414692301980302e-05, "data_time": 0.002720546722412109, "grad_norm": 0.28935302048921585, "loss": 0.47353227734565734, "time": 1.800430989265442, "epoch": 756, "memory": 36824, "step": 236515}
{"lr": 1.3222708356005872e-05, "data_time": 0.0035692453384399414, "grad_norm": 0.295153746008873, "loss": 0.4675372987985611, "time": 1.780786895751953, "epoch": 756, "memory": 36824, "step": 236615}
{"lr": 1.3007417957806939e-05, "data_time": 0.11286716461181641, "grad_norm": 0.29859180450439454, "loss": 0.4703030318021774, "time": 1.7359347105026246, "epoch": 757, "memory": 36824, "step": 236728}
{"lr": 1.2818357320451957e-05, "data_time": 0.002927684783935547, "grad_norm": 0.2976333349943161, "loss": 0.4691959947347641, "time": 1.8242239475250244, "epoch": 757, "memory": 36824, "step": 236828}
{"lr": 1.2630669645928485e-05, "data_time": 0.0032622575759887694, "grad_norm": 0.302423819899559, "loss": 0.47594558596611025, "time": 1.8269949913024903, "epoch": 757, "memory": 36824, "step": 236928}
{"lr": 1.2420235276766036e-05, "data_time": 0.14923906326293945, "grad_norm": 0.2882069110870361, "loss": 0.47411068081855773, "time": 1.8096154928207397, "epoch": 758, "memory": 36824, "step": 237041}
{"lr": 1.2235473101289046e-05, "data_time": 0.0027741193771362305, "grad_norm": 0.288833923637867, "loss": 0.47066753208637235, "time": 1.8192268371582032, "epoch": 758, "memory": 36824, "step": 237141}
{"lr": 1.205208490528101e-05, "data_time": 0.0041915178298950195, "grad_norm": 0.29558136165142057, "loss": 0.47327702641487124, "time": 1.8574021339416504, "epoch": 758, "memory": 36824, "step": 237241}
{"lr": 1.1846510160897676e-05, "data_time": 0.17372655868530273, "grad_norm": 0.30047703087329863, "loss": 0.4708143979310989, "time": 1.8057645797729491, "epoch": 759, "memory": 36824, "step": 237354}
{"lr": 1.1666049604374502e-05, "data_time": 0.0027527809143066406, "grad_norm": 0.2852308824658394, "loss": 0.47223952114582063, "time": 1.809016489982605, "epoch": 759, "memory": 36824, "step": 237454}
{"lr": 1.1486964020480152e-05, "data_time": 0.0039823055267333984, "grad_norm": 0.29148172289133073, "loss": 0.47442675232887266, "time": 1.7621505498886108, "epoch": 759, "memory": 36824, "step": 237554}
{"lr": 1.1286252413581171e-05, "data_time": 0.0028093576431274412, "grad_norm": 0.282387912273407, "loss": 0.4711599975824356, "time": 1.840365219116211, "epoch": 760, "memory": 36824, "step": 237667}
{"lr": 1.1110096559584801e-05, "data_time": 0.003068995475769043, "grad_norm": 0.2918977618217468, "loss": 0.4727849572896957, "time": 1.7905531644821167, "epoch": 760, "memory": 36824, "step": 237767}
{"lr": 1.0935316647882763e-05, "data_time": 0.0030395030975341798, "grad_norm": 0.29205926358699796, "loss": 0.4699918806552887, "time": 1.8250601053237916, "epoch": 760, "memory": 36824, "step": 237867}
{"lr": 1.0739471608075671e-05, "data_time": 0.23288316726684571, "grad_norm": 0.28351337015628814, "loss": 0.47090044915676116, "time": 1.833602237701416, "epoch": 761, "memory": 36824, "step": 237980}
{"lr": 1.056762346662366e-05, "data_time": 0.0027352571487426758, "grad_norm": 0.2939088433980942, "loss": 0.47313923239707945, "time": 1.8622727155685426, "epoch": 761, "memory": 36824, "step": 238080}
{"lr": 1.0397152213620411e-05, "data_time": 0.003219151496887207, "grad_norm": 0.29820266664028167, "loss": 0.47877146005630494, "time": 1.8145380020141602, "epoch": 761, "memory": 36824, "step": 238180}
{"lr": 1.0206177087356582e-05, "data_time": 0.1540010690689087, "grad_norm": 0.28285880833864213, "loss": 0.47389099895954134, "time": 1.8163347005844117, "epoch": 762, "memory": 36824, "step": 238293}
{"lr": 1.0038639594859427e-05, "data_time": 0.002947568893432617, "grad_norm": 0.2918893963098526, "loss": 0.4735269367694855, "time": 1.802617335319519, "epoch": 762, "memory": 36824, "step": 238393}
{"lr": 9.872479913438264e-06, "data_time": 0.003081512451171875, "grad_norm": 0.3145481586456299, "loss": 0.4763170123100281, "time": 1.7773759841918946, "epoch": 762, "memory": 36824, "step": 238493}
{"lr": 9.686377963955749e-06, "data_time": 0.07732152938842773, "grad_norm": 0.27199389934539797, "loss": 0.4683741867542267, "time": 1.831181573867798, "epoch": 763, "memory": 36824, "step": 238606}
{"lr": 9.52315398316703e-06, "data_time": 0.002531266212463379, "grad_norm": 0.29692926853895185, "loss": 0.4723546028137207, "time": 1.797152853012085, "epoch": 763, "memory": 36824, "step": 238706}
{"lr": 9.361308712538401e-06, "data_time": 0.0038460254669189452, "grad_norm": 0.30891583263874056, "loss": 0.469590300321579, "time": 1.8230050086975098, "epoch": 763, "memory": 36824, "step": 238806}
{"lr": 9.180083119806016e-06, "data_time": 0.24781413078308107, "grad_norm": 0.27347083687782286, "loss": 0.47199456095695497, "time": 1.8355300188064576, "epoch": 764, "memory": 36824, "step": 238919}
{"lr": 9.021175439773132e-06, "data_time": 0.003127455711364746, "grad_norm": 0.285073122382164, "loss": 0.4728126645088196, "time": 1.865853214263916, "epoch": 764, "memory": 36824, "step": 239019}
{"lr": 8.863647345426307e-06, "data_time": 0.0036120176315307616, "grad_norm": 0.3068466275930405, "loss": 0.47341094017028806, "time": 1.813084077835083, "epoch": 764, "memory": 36824, "step": 239119}
{"lr": 8.687301206089388e-06, "data_time": 0.225801420211792, "grad_norm": 0.2930135875940323, "loss": 0.47219730019569395, "time": 1.8286972522735596, "epoch": 765, "memory": 36824, "step": 239232}
{"lr": 8.532712542106289e-06, "data_time": 0.0031660795211791992, "grad_norm": 0.29642780870199203, "loss": 0.4762172311544418, "time": 1.8675699472427367, "epoch": 765, "memory": 36824, "step": 239332}
{"lr": 8.379504315761708e-06, "data_time": 0.0029764413833618165, "grad_norm": 0.2920360043644905, "loss": 0.4726276695728302, "time": 1.7852591037750245, "epoch": 765, "memory": 36824, "step": 239432}
{"lr": 8.208040643089187e-06, "data_time": 0.014467000961303711, "grad_norm": 0.2828930675983429, "loss": 0.47535678148269656, "time": 1.7929923295974732, "epoch": 766, "memory": 36824, "step": 239545}
{"lr": 8.057773636649473e-06, "data_time": 0.002776336669921875, "grad_norm": 0.3058796152472496, "loss": 0.47581121921539304, "time": 1.8776405811309815, "epoch": 766, "memory": 36824, "step": 239645}
{"lr": 7.908887896213024e-06, "data_time": 0.003722834587097168, "grad_norm": 0.30454814434051514, "loss": 0.4760968804359436, "time": 1.768368124961853, "epoch": 766, "memory": 36824, "step": 239745}
{"lr": 7.742309620046049e-06, "data_time": 0.10002214908599853, "grad_norm": 0.2848932147026062, "loss": 0.4744962155818939, "time": 1.8081703186035156, "epoch": 767, "memory": 36824, "step": 239858}
{"lr": 7.596366838798208e-06, "data_time": 0.0026387453079223635, "grad_norm": 0.3075950711965561, "loss": 0.4696510285139084, "time": 1.7192102909088134, "epoch": 767, "memory": 36824, "step": 239958}
{"lr": 7.451806128316375e-06, "data_time": 0.003215527534484863, "grad_norm": 0.2774272680282593, "loss": 0.47700328230857847, "time": 1.7669742822647094, "epoch": 767, "memory": 36824, "step": 240058}
{"lr": 7.2901160950182e-06, "data_time": 0.002809596061706543, "grad_norm": 0.28654733300209045, "loss": 0.4747131824493408, "time": 1.769330883026123, "epoch": 768, "memory": 36824, "step": 240171}
{"lr": 7.148500032721837e-06, "data_time": 0.002891230583190918, "grad_norm": 0.3171507060527802, "loss": 0.4744974225759506, "time": 1.8181209802627563, "epoch": 768, "memory": 36824, "step": 240271}
{"lr": 7.008266822338272e-06, "data_time": 0.0031284809112548826, "grad_norm": 0.2882899969816208, "loss": 0.47292754650115965, "time": 1.8252367496490478, "epoch": 768, "memory": 36824, "step": 240371}
{"lr": 6.8514677947451605e-06, "data_time": 0.10559239387512206, "grad_norm": 0.3196036219596863, "loss": 0.46949381530284884, "time": 1.8179990291595458, "epoch": 769, "memory": 36824, "step": 240484}
{"lr": 6.714180871228075e-06, "data_time": 0.002845001220703125, "grad_norm": 0.28387542366981505, "loss": 0.4733745366334915, "time": 1.8044479846954347, "epoch": 769, "memory": 36824, "step": 240584}
{"lr": 6.578277557141486e-06, "data_time": 0.003616952896118164, "grad_norm": 0.27803994715213776, "loss": 0.47631544768810274, "time": 1.751646065711975, "epoch": 769, "memory": 36824, "step": 240684}
{"lr": 6.42637221451639e-06, "data_time": 0.3820312738418579, "grad_norm": 0.2871440529823303, "loss": 0.4683077037334442, "time": 1.8282356977462768, "epoch": 770, "memory": 36824, "step": 240797}
{"lr": 6.293416775633207e-06, "data_time": 0.002623295783996582, "grad_norm": 0.27420033812522887, "loss": 0.47094531953334806, "time": 1.8223011016845703, "epoch": 770, "memory": 36824, "step": 240897}
{"lr": 6.1618456800563155e-06, "data_time": 0.003253316879272461, "grad_norm": 0.2944681018590927, "loss": 0.47154344618320465, "time": 1.7386207580566406, "epoch": 770, "memory": 36824, "step": 240997}
{"lr": 6.0148366180423974e-06, "data_time": 0.0029778242111206054, "grad_norm": 0.3037253364920616, "loss": 0.47083936631679535, "time": 1.8708529710769652, "epoch": 771, "memory": 36824, "step": 241110}
{"lr": 5.886214935634766e-06, "data_time": 0.0030465126037597656, "grad_norm": 0.2839520663022995, "loss": 0.4735999435186386, "time": 1.8393226385116577, "epoch": 771, "memory": 36824, "step": 241210}
{"lr": 5.758978306754471e-06, "data_time": 0.0031345844268798827, "grad_norm": 0.3044139355421066, "loss": 0.4695530354976654, "time": 1.9850582122802733, "epoch": 771, "memory": 36824, "step": 241310}
{"lr": 5.6168680373308845e-06, "data_time": 0.12927589416503907, "grad_norm": 0.2788354426622391, "loss": 0.47413574159145355, "time": 1.8184940099716187, "epoch": 772, "memory": 36824, "step": 241423}
{"lr": 5.492582309188484e-06, "data_time": 0.0032971858978271484, "grad_norm": 0.29852012991905214, "loss": 0.47455650866031646, "time": 1.8165515422821046, "epoch": 772, "memory": 36824, "step": 241523}
{"lr": 5.369682321127786e-06, "data_time": 0.0030542612075805664, "grad_norm": 0.29674655497074126, "loss": 0.4731240481138229, "time": 1.8813256978988648, "epoch": 772, "memory": 36824, "step": 241623}
{"lr": 5.232473272567051e-06, "data_time": 0.17992777824401857, "grad_norm": 0.28284635543823244, "loss": 0.47554286420345304, "time": 1.8428009033203125, "epoch": 773, "memory": 36824, "step": 241736}
{"lr": 5.112525622390035e-06, "data_time": 0.0027428865432739258, "grad_norm": 0.2874435007572174, "loss": 0.4735205054283142, "time": 1.7882308483123779, "epoch": 773, "memory": 36824, "step": 241836}
{"lr": 4.9939643751705204e-06, "data_time": 0.0035759925842285155, "grad_norm": 0.29753921926021576, "loss": 0.4747518926858902, "time": 2.1364824771881104, "epoch": 773, "memory": 36824, "step": 241936}
{"lr": 4.861658891996761e-06, "data_time": 0.4354416370391846, "grad_norm": 0.30880975127220156, "loss": 0.4755222022533417, "time": 1.8544873952865601, "epoch": 774, "memory": 36824, "step": 242049}
{"lr": 4.746051369359592e-06, "data_time": 0.002837681770324707, "grad_norm": 0.29975918829441073, "loss": 0.4782138645648956, "time": 2.0762195348739625, "epoch": 774, "memory": 36824, "step": 242149}
{"lr": 4.6318308888659655e-06, "data_time": 0.0035926342010498048, "grad_norm": 0.2867966890335083, "loss": 0.4697766602039337, "time": 1.8210721254348754, "epoch": 774, "memory": 36824, "step": 242249}
{"lr": 4.504431231814712e-06, "data_time": 0.07040040493011475, "grad_norm": 0.2838874086737633, "loss": 0.47486119270324706, "time": 1.8037853717803956, "epoch": 775, "memory": 36824, "step": 242362}
{"lr": 4.393165812130903e-06, "data_time": 0.0029104471206665037, "grad_norm": 0.28448804169893266, "loss": 0.47283701598644257, "time": 1.854196262359619, "epoch": 775, "memory": 36824, "step": 242462}
{"lr": 4.28328805007599e-06, "data_time": 0.002991652488708496, "grad_norm": 0.30327659398317336, "loss": 0.4700477659702301, "time": 1.9620927095413208, "epoch": 775, "memory": 36824, "step": 242562}
{"lr": 4.160796396055918e-06, "data_time": 0.34115617275238036, "grad_norm": 0.2883804738521576, "loss": 0.4733739882707596, "time": 1.7965171575546264, "epoch": 776, "memory": 36824, "step": 242675}
{"lr": 4.053874980544578e-06, "data_time": 0.0026405811309814452, "grad_norm": 0.30238552391529083, "loss": 0.4734924644231796, "time": 1.8130653858184815, "epoch": 776, "memory": 36824, "step": 242775}
{"lr": 3.948341814436228e-06, "data_time": 0.003512001037597656, "grad_norm": 0.2942912369966507, "loss": 0.4800530880689621, "time": 1.762648916244507, "epoch": 776, "memory": 36824, "step": 242875}
{"lr": 3.830760256491662e-06, "data_time": 0.13608484268188475, "grad_norm": 0.29346535801887513, "loss": 0.4718450129032135, "time": 1.7703240394592286, "epoch": 777, "memory": 36824, "step": 242988}
{"lr": 3.728184672144766e-06, "data_time": 0.002931547164916992, "grad_norm": 0.28242732435464857, "loss": 0.4710340738296509, "time": 1.7719414234161377, "epoch": 777, "memory": 36824, "step": 243088}
{"lr": 3.6269979052535746e-06, "data_time": 0.003691267967224121, "grad_norm": 0.3145637109875679, "loss": 0.4716640800237656, "time": 2.0452231645584105, "epoch": 777, "memory": 36824, "step": 243188}
{"lr": 3.514328452529008e-06, "data_time": 0.20861468315124512, "grad_norm": 0.2915444761514664, "loss": 0.47473427951335906, "time": 1.7877819061279296, "epoch": 778, "memory": 36824, "step": 243301}
{"lr": 3.4161004520802506e-06, "data_time": 0.002751278877258301, "grad_norm": 0.2984134316444397, "loss": 0.47060964405536654, "time": 1.8249218463897705, "epoch": 778, "memory": 36824, "step": 243401}
{"lr": 3.3192618134087607e-06, "data_time": 0.0036495208740234377, "grad_norm": 0.2988319009542465, "loss": 0.471898490190506, "time": 1.7603450536727905, "epoch": 778, "memory": 36824, "step": 243501}
{"lr": 3.2115063911143207e-06, "data_time": 0.3524779319763184, "grad_norm": 0.2969300389289856, "loss": 0.472418999671936, "time": 1.769507145881653, "epoch": 779, "memory": 36824, "step": 243614}
{"lr": 3.117627653009167e-06, "data_time": 0.002980947494506836, "grad_norm": 0.28484505265951154, "loss": 0.47235956192016604, "time": 1.7272953510284423, "epoch": 779, "memory": 36824, "step": 243714}
{"lr": 3.0251387972627273e-06, "data_time": 0.0038310527801513673, "grad_norm": 0.28182088285684587, "loss": 0.47089130282402036, "time": 2.109127473831177, "epoch": 779, "memory": 36824, "step": 243814}
{"lr": 2.922299246641304e-06, "data_time": 0.24447038173675537, "grad_norm": 0.28318225145339965, "loss": 0.47371888160705566, "time": 1.7865578413009644, "epoch": 780, "memory": 36824, "step": 243927}
{"lr": 2.832771375008317e-06, "data_time": 0.002547001838684082, "grad_norm": 0.2970747023820877, "loss": 0.47262718975543977, "time": 1.7590466737747192, "epoch": 780, "memory": 36824, "step": 244027}
{"lr": 2.744633882566219e-06, "data_time": 0.0037097692489624023, "grad_norm": 0.2943828493356705, "loss": 0.4753462433815002, "time": 1.8098784685134888, "epoch": 780, "memory": 36824, "step": 244127}
{"lr": 2.646711960861849e-06, "data_time": 0.049921059608459474, "grad_norm": 0.3090414792299271, "loss": 0.4694674849510193, "time": 1.89349262714386, "epoch": 781, "memory": 36824, "step": 244240}
{"lr": 2.5615364854854215e-06, "data_time": 0.0027799129486083983, "grad_norm": 0.29293749630451205, "loss": 0.47122803032398225, "time": 1.7719377279281616, "epoch": 781, "memory": 36824, "step": 244340}
{"lr": 2.477751862374174e-06, "data_time": 0.003268861770629883, "grad_norm": 0.2972269386053085, "loss": 0.47463229596614837, "time": 1.990189504623413, "epoch": 781, "memory": 36824, "step": 244440}
{"lr": 2.3847492428023858e-06, "data_time": 0.08054168224334717, "grad_norm": 0.2935088753700256, "loss": 0.4719021737575531, "time": 2.1081399440765383, "epoch": 782, "memory": 36824, "step": 244553}
{"lr": 2.3039276190965245e-06, "data_time": 0.003100132942199707, "grad_norm": 0.2890795350074768, "loss": 0.4740984052419662, "time": 1.7916623830795289, "epoch": 782, "memory": 36824, "step": 244653}
{"lr": 2.224497296964173e-06, "data_time": 0.003026413917541504, "grad_norm": 0.30626955777406695, "loss": 0.4737300932407379, "time": 1.8010604619979858, "epoch": 782, "memory": 36824, "step": 244753}
{"lr": 2.1364155686831004e-06, "data_time": 0.002896022796630859, "grad_norm": 0.29220141768455504, "loss": 0.47272787392139437, "time": 2.2735494136810304, "epoch": 783, "memory": 36824, "step": 244866}
{"lr": 2.0599491776661316e-06, "data_time": 0.0028569936752319337, "grad_norm": 0.28745187073946, "loss": 0.4727467864751816, "time": 1.827139663696289, "epoch": 783, "memory": 36824, "step": 244966}
{"lr": 1.98487451375774e-06, "data_time": 0.0030784130096435545, "grad_norm": 0.30117145776748655, "loss": 0.47239499986171724, "time": 1.7887661933898926, "epoch": 783, "memory": 36824, "step": 245066}
{"lr": 1.901715181841228e-06, "data_time": 0.03494703769683838, "grad_norm": Infinity, "loss": 0.4796008378267288, "time": 1.8806983470916747, "epoch": 784, "memory": 36824, "step": 245179}
{"lr": 1.8296053301128662e-06, "data_time": 0.0036791086196899412, "grad_norm": 0.2950225055217743, "loss": 0.47276903688907623, "time": 1.7744229078292846, "epoch": 784, "memory": 36824, "step": 245279}
{"lr": 1.7588876072473574e-06, "data_time": 0.0030446290969848634, "grad_norm": 0.2809180185198784, "loss": 0.4712141126394272, "time": 1.8034065008163451, "epoch": 784, "memory": 36824, "step": 245379}
{"lr": 1.6806520926587495e-06, "data_time": 0.0028450727462768556, "grad_norm": 0.29226444512605665, "loss": 0.47404723763465884, "time": 1.8625734806060792, "epoch": 785, "memory": 36824, "step": 245492}
{"lr": 1.6129000123774596e-06, "data_time": 0.003006291389465332, "grad_norm": 0.3167698621749878, "loss": 0.47520507872104645, "time": 1.788002848625183, "epoch": 785, "memory": 36824, "step": 245592}
{"lr": 1.5465404389255714e-06, "data_time": 0.0033137321472167967, "grad_norm": 0.2954094111919403, "loss": 0.4757382094860077, "time": 1.8834263324737548, "epoch": 785, "memory": 36824, "step": 245692}
{"lr": 1.4732300784940074e-06, "data_time": 0.2641695737838745, "grad_norm": 0.2850972577929497, "loss": 0.4739251732826233, "time": 1.8396976470947266, "epoch": 786, "memory": 36824, "step": 245805}
{"lr": 1.4098369273559504e-06, "data_time": 0.003419041633605957, "grad_norm": 0.313426274061203, "loss": 0.474708291888237, "time": 1.8305728912353516, "epoch": 786, "memory": 36824, "step": 245905}
{"lr": 1.34783663721963e-06, "data_time": 0.0037906408309936524, "grad_norm": 0.282975734770298, "loss": 0.4675292432308197, "time": 1.7465278387069703, "epoch": 786, "memory": 36824, "step": 246005}
{"lr": 1.2794526836169402e-06, "data_time": 0.05561838150024414, "grad_norm": 0.2914420336484909, "loss": 0.47260375916957853, "time": 1.7894349336624145, "epoch": 787, "memory": 36824, "step": 246118}
{"lr": 1.22041954483608e-06, "data_time": 0.0027540445327758787, "grad_norm": 0.27793107330799105, "loss": 0.471847328543663, "time": 1.7366075038909912, "epoch": 787, "memory": 36824, "step": 246218}
{"lr": 1.1627795974293075e-06, "data_time": 0.004465150833129883, "grad_norm": 0.31103689670562745, "loss": 0.4698329359292984, "time": 1.7803978204727173, "epoch": 787, "memory": 36824, "step": 246318}
{"lr": 1.09932321914853e-06, "data_time": 0.2542020082473755, "grad_norm": 0.2821023792028427, "loss": 0.47286557853221894, "time": 1.8307596445083618, "epoch": 788, "memory": 36824, "step": 246431}
{"lr": 1.0446511014383329e-06, "data_time": 0.003003978729248047, "grad_norm": 0.3041904091835022, "loss": 0.47678509056568147, "time": 1.7898677110671997, "epoch": 788, "memory": 36824, "step": 246531}
{"lr": 9.913724816687312e-07, "data_time": 0.003190445899963379, "grad_norm": 0.29930990636348725, "loss": 0.4698614001274109, "time": 1.7754799604415894, "epoch": 788, "memory": 36824, "step": 246631}
{"lr": 9.328447630043852e-07, "data_time": 0.09336445331573487, "grad_norm": 0.2894297271966934, "loss": 0.47295344769954684, "time": 1.7812556028366089, "epoch": 789, "memory": 36824, "step": 246744}
{"lr": 8.825346005604205e-07, "data_time": 0.0027363061904907226, "grad_norm": 0.29580338299274445, "loss": 0.47188356816768645, "time": 1.8303630352020264, "epoch": 789, "memory": 36824, "step": 246844}
{"lr": 8.336182188127341e-07, "data_time": 0.003720808029174805, "grad_norm": 0.30178194046020507, "loss": 0.46814084947109225, "time": 1.8427101373672485, "epoch": 789, "memory": 36824, "step": 246944}
{"lr": 7.800201598419965e-07, "data_time": 0.16548304557800292, "grad_norm": 0.29238620698451995, "loss": 0.47302051782608034, "time": 1.7890653133392334, "epoch": 790, "memory": 36824, "step": 247057}
{"lr": 7.340728123262027e-07, "data_time": 0.002835869789123535, "grad_norm": 0.28901807963848114, "loss": 0.4705797225236893, "time": 1.7943002223968505, "epoch": 790, "memory": 36824, "step": 247157}
{"lr": 6.895195044462244e-07, "data_time": 0.00313112735748291, "grad_norm": 0.2830961123108864, "loss": 0.47056333124637606, "time": 1.7886642694473267, "epoch": 790, "memory": 36824, "step": 247257}
{"lr": 6.408520210120536e-07, "data_time": 0.20015270709991456, "grad_norm": 0.28322104662656783, "loss": 0.4740139991044998, "time": 1.814875888824463, "epoch": 791, "memory": 36824, "step": 247370}
{"lr": 5.992682735378277e-07, "data_time": 0.002795290946960449, "grad_norm": 0.2798053503036499, "loss": 0.47357662320137023, "time": 1.8380748271942138, "epoch": 791, "memory": 36824, "step": 247470}
{"lr": 5.59078800818808e-07, "data_time": 0.0030806779861450194, "grad_norm": 0.30924543738365173, "loss": 0.47429771423339845, "time": 1.8423157453536987, "epoch": 791, "memory": 36824, "step": 247570}
{"lr": 5.153427245141307e-07, "data_time": 0.01972684860229492, "grad_norm": 0.30094352662563323, "loss": 0.4724219083786011, "time": 1.8437024116516114, "epoch": 792, "memory": 36824, "step": 247683}
{"lr": 4.781232876330897e-07, "data_time": 0.00269010066986084, "grad_norm": 0.2968481659889221, "loss": 0.47798529267311096, "time": 1.848382544517517, "epoch": 792, "memory": 36824, "step": 247783}
{"lr": 4.4229833680205924e-07, "data_time": 0.0029532909393310547, "grad_norm": 0.29206993281841276, "loss": 0.47408345639705657, "time": 1.791518235206604, "epoch": 792, "memory": 36824, "step": 247883}
{"lr": 4.0349441495591807e-07, "data_time": 0.002669548988342285, "grad_norm": 0.28955630362033846, "loss": 0.47232740223407743, "time": 1.9171000003814698, "epoch": 793, "memory": 36824, "step": 247996}
{"lr": 3.706399246453779e-07, "data_time": 0.002698373794555664, "grad_norm": 0.2849667251110077, "loss": 0.4715410202741623, "time": 1.6592010736465455, "epoch": 793, "memory": 36824, "step": 248096}
{"lr": 3.3918010785213416e-07, "data_time": 0.003117942810058594, "grad_norm": 0.28295388221740725, "loss": 0.4686751365661621, "time": 1.7431130886077881, "epoch": 793, "memory": 36824, "step": 248196}
{"lr": 3.053090035162764e-07, "data_time": 0.00276036262512207, "grad_norm": 0.28774687349796296, "loss": 0.47569589614868163, "time": 1.7826818943023681, "epoch": 794, "memory": 36824, "step": 248309}
{"lr": 2.768200211689468e-07, "data_time": 0.0029119253158569336, "grad_norm": 0.30004214197397233, "loss": 0.4704682767391205, "time": 1.865140438079834, "epoch": 794, "memory": 36824, "step": 248409}
{"lr": 2.497258759747746e-07, "data_time": 0.003494596481323242, "grad_norm": 0.29508817195892334, "loss": 0.4721840262413025, "time": 1.775754737854004, "epoch": 794, "memory": 36824, "step": 248509}
{"lr": 2.207881679130818e-07, "data_time": 0.48310692310333253, "grad_norm": 0.29514507949352264, "loss": 0.47407753169536593, "time": 1.8001207113265991, "epoch": 795, "memory": 36824, "step": 248622}
{"lr": 1.9666518032694332e-07, "data_time": 0.004149436950683594, "grad_norm": 0.2783327117562294, "loss": 0.4715307176113129, "time": 1.8368484497070312, "epoch": 795, "memory": 36824, "step": 248722}
{"lr": 1.7393716969617578e-07, "data_time": 0.0038175582885742188, "grad_norm": 0.2856064185500145, "loss": 0.4660600572824478, "time": 2.0940324544906614, "epoch": 795, "memory": 36824, "step": 248822}
{"lr": 1.4993335237418274e-07, "data_time": 0.17039618492126465, "grad_norm": 0.2837155252695084, "loss": 0.47245603501796724, "time": 1.9728456020355225, "epoch": 796, "memory": 36824, "step": 248935}
{"lr": 1.3017677174466796e-07, "data_time": 0.002663135528564453, "grad_norm": 0.28158734142780306, "loss": 0.474651637673378, "time": 1.7781845092773438, "epoch": 796, "memory": 36824, "step": 249035}
{"lr": 1.1181528403616157e-07, "data_time": 0.0030424118041992186, "grad_norm": 0.2916103512048721, "loss": 0.46943187415599824, "time": 1.7585854530334473, "epoch": 796, "memory": 36824, "step": 249135}
{"lr": 9.274576761288648e-08, "data_time": 0.27258739471435545, "grad_norm": 0.2741494089365005, "loss": 0.47550291419029234, "time": 1.8455915689468383, "epoch": 797, "memory": 36824, "step": 249248}
{"lr": 7.73559315253333e-08, "data_time": 0.0026217222213745115, "grad_norm": 0.28393522948026656, "loss": 0.47449549436569216, "time": 1.8361478567123413, "epoch": 797, "memory": 36824, "step": 249348}
{"lr": 6.336128048633833e-08, "data_time": 0.0030402421951293947, "grad_norm": Infinity, "loss": 0.47568243741989136, "time": 1.8129401683807373, "epoch": 797, "memory": 36824, "step": 249448}
{"lr": 4.922639080726497e-08, "data_time": 0.09003753662109375, "grad_norm": 0.28634020537137983, "loss": 0.4704832911491394, "time": 1.8564341068267822, "epoch": 798, "memory": 36824, "step": 249561}
{"lr": 3.820356223149677e-08, "data_time": 0.003003835678100586, "grad_norm": 0.2796948775649071, "loss": 0.4747228264808655, "time": 1.906168031692505, "epoch": 798, "memory": 36824, "step": 249661}
{"lr": 2.857598699197347e-08, "data_time": 0.0038614273071289062, "grad_norm": 0.2690067902207375, "loss": 0.47527853548526766, "time": 1.8461170196533203, "epoch": 798, "memory": 36824, "step": 249761}
{"lr": 1.9375965583278613e-08, "data_time": 0.0026134967803955076, "grad_norm": 0.30744518637657164, "loss": 0.4694148451089859, "time": 1.8951337814331055, "epoch": 799, "memory": 36824, "step": 249874}
{"lr": 1.272033286916454e-08, "data_time": 0.002799677848815918, "grad_norm": 0.3130819097161293, "loss": 0.4755672961473465, "time": 1.8199031114578248, "epoch": 799, "memory": 36824, "step": 249974}
{"lr": 7.459997937697449e-09, "data_time": 0.003088498115539551, "grad_norm": 0.2835987567901611, "loss": 0.4778809010982513, "time": 1.8310145378112792, "epoch": 799, "memory": 36824, "step": 250074}
{"lr": 3.1950020024317233e-09, "data_time": 0.0028713703155517577, "grad_norm": 0.3063159018754959, "loss": 0.4731050133705139, "time": 1.7856573104858398, "epoch": 800, "memory": 36824, "step": 250187}
{"lr": 9.066788763334123e-10, "data_time": 0.002974843978881836, "grad_norm": 0.29064589738845825, "loss": 0.4720408976078033, "time": 1.8750527381896973, "epoch": 800, "memory": 36824, "step": 250287}
{"lr": 1.3674137644414455e-11, "data_time": 0.0037240028381347657, "grad_norm": 0.2799965858459473, "loss": 0.4721928179264069, "time": 1.7774444580078126, "epoch": 800, "memory": 36824, "step": 250387}
