{"lr": 0.0015867115810019865, "data_time": 0.0018190383911132813, "grad_norm": 0.33633670806884763, "loss": 0.4771943598985672, "time": 1.1878127574920654, "epoch": 66, "memory": 35394, "step": 20445}
{"lr": 0.0015865240782157266, "data_time": 0.0018893003463745118, "grad_norm": 0.32672303467988967, "loss": 0.4781449258327484, "time": 1.1844202280044556, "epoch": 66, "memory": 35394, "step": 20545}
{"lr": 0.001586335273061135, "data_time": 0.0024997472763061525, "grad_norm": 0.29738105535507203, "loss": 0.47435301542282104, "time": 1.1428546190261841, "epoch": 66, "memory": 35394, "step": 20645}
{"lr": 0.0015861203562939068, "data_time": 0.003079843521118164, "grad_norm": 0.2560360670089722, "loss": 0.48140924870967866, "time": 1.228310227394104, "epoch": 67, "memory": 35394, "step": 20758}
{"lr": 0.001585928778139624, "data_time": 0.001997208595275879, "grad_norm": 0.28427727669477465, "loss": 0.48061569333076476, "time": 1.1434895992279053, "epoch": 67, "memory": 35394, "step": 20858}
{"lr": 0.0015857358986027387, "data_time": 0.002923750877380371, "grad_norm": 0.32307083904743195, "loss": 0.48147622048854827, "time": 1.1400766849517823, "epoch": 67, "memory": 35394, "step": 20958}
{"lr": 0.0015855163789782664, "data_time": 0.0018380641937255859, "grad_norm": 0.3068059369921684, "loss": 0.4817797362804413, "time": 1.2017001152038573, "epoch": 68, "memory": 35394, "step": 21071}
{"lr": 0.0015853207285637812, "data_time": 0.0019465923309326173, "grad_norm": 0.2938296303153038, "loss": 0.4810342162847519, "time": 1.163308358192444, "epoch": 68, "memory": 35394, "step": 21171}
{"lr": 0.0015851237777735332, "data_time": 0.0032125473022460937, "grad_norm": 0.28601300418376924, "loss": 0.48834707736968996, "time": 1.1487160682678224, "epoch": 68, "memory": 35394, "step": 21271}
{"lr": 0.0015848996588529113, "data_time": 0.1718907356262207, "grad_norm": 0.32265938222408297, "loss": 0.48301646411418914, "time": 1.4652145624160766, "epoch": 69, "memory": 35394, "step": 21384}
{"lr": 0.0015846999393521063, "data_time": 0.0027937889099121094, "grad_norm": 0.310563799738884, "loss": 0.4800885409116745, "time": 1.4578091859817506, "epoch": 69, "memory": 35394, "step": 21484}
{"lr": 0.0015844989205034755, "data_time": 0.004140996932983398, "grad_norm": 0.33326012194156646, "loss": 0.472564360499382, "time": 1.4422926664352418, "epoch": 69, "memory": 35394, "step": 21584}
{"lr": 0.0015842702059224046, "data_time": 0.03520631790161133, "grad_norm": 0.3134883850812912, "loss": 0.48510353863239286, "time": 1.417341375350952, "epoch": 70, "memory": 35394, "step": 21697}
{"lr": 0.0015840664205751692, "data_time": 0.0030492544174194336, "grad_norm": 0.35180778354406356, "loss": 0.4804906785488129, "time": 1.4160962104797363, "epoch": 70, "memory": 35394, "step": 21797}
{"lr": 0.001583861336929125, "data_time": 0.0034549951553344725, "grad_norm": 0.3378278881311417, "loss": 0.47636950612068174, "time": 1.3687460899353028, "epoch": 70, "memory": 35394, "step": 21897}
{"lr": 0.001583628030397864, "data_time": 0.222237229347229, "grad_norm": 0.36526448726654054, "loss": 0.4802695870399475, "time": 1.4163522720336914, "epoch": 71, "memory": 35394, "step": 22010}
{"lr": 0.001583420182510047, "data_time": 0.002308201789855957, "grad_norm": 0.3435163199901581, "loss": 0.47986389994621276, "time": 1.4363486289978027, "epoch": 71, "memory": 35394, "step": 22110}
{"lr": 0.0015832110373934958, "data_time": 0.0038530588150024413, "grad_norm": 0.3110512375831604, "loss": 0.47675970792770384, "time": 1.482214593887329, "epoch": 71, "memory": 35394, "step": 22210}
{"lr": 0.0015829731426967874, "data_time": 0.038888287544250486, "grad_norm": 0.3582803592085838, "loss": 0.48503599762916566, "time": 1.4522196054458618, "epoch": 72, "memory": 35394, "step": 22323}
{"lr": 0.0015827612356401405, "data_time": 0.0022464275360107424, "grad_norm": 0.320953032374382, "loss": 0.4784739017486572, "time": 1.508939552307129, "epoch": 72, "memory": 35394, "step": 22423}
{"lr": 0.001582548032445879, "data_time": 0.002953839302062988, "grad_norm": 0.2506822973489761, "loss": 0.4745581328868866, "time": 1.4596158266067505, "epoch": 72, "memory": 35394, "step": 22523}
{"lr": 0.0015823055534429017, "data_time": 0.0027666807174682615, "grad_norm": 0.3261309310793877, "loss": 0.4830485701560974, "time": 1.399160885810852, "epoch": 73, "memory": 35394, "step": 22636}
{"lr": 0.0015820895906550273, "data_time": 0.0028375864028930666, "grad_norm": 0.3351388335227966, "loss": 0.48075917065143586, "time": 1.571267056465149, "epoch": 73, "memory": 35394, "step": 22736}
{"lr": 0.0015818723328416802, "data_time": 0.0030637025833129884, "grad_norm": 0.313473716378212, "loss": 0.4784325689077377, "time": 1.4521634101867675, "epoch": 73, "memory": 35394, "step": 22836}
{"lr": 0.00158162527346598, "data_time": 0.2530268669128418, "grad_norm": 0.2766367107629776, "loss": 0.4876763582229614, "time": 1.4399670362472534, "epoch": 74, "memory": 35394, "step": 22949}
{"lr": 0.001581405258450268, "data_time": 0.0026723146438598633, "grad_norm": 0.31157643646001815, "loss": 0.4765566438436508, "time": 1.4472007513046266, "epoch": 74, "memory": 35394, "step": 23049}
{"lr": 0.0015811839495422375, "data_time": 0.0038507938385009765, "grad_norm": 0.3464258223772049, "loss": 0.48570317327976226, "time": 1.4556889057159423, "epoch": 74, "memory": 35394, "step": 23149}
{"lr": 0.001580932313801662, "data_time": 0.002625846862792969, "grad_norm": 0.3892317354679108, "loss": 0.4777612954378128, "time": 8.194641518592835, "epoch": 75, "memory": 35394, "step": 23262}
{"lr": 0.001580708250127245, "data_time": 0.0020206212997436524, "grad_norm": 0.3155887067317963, "loss": 0.48215403854846955, "time": 1.2871931314468383, "epoch": 75, "memory": 35394, "step": 23362}
{"lr": 0.0015804828937146524, "data_time": 0.0033728837966918944, "grad_norm": 0.30931887924671175, "loss": 0.482264107465744, "time": 1.7295164823532105, "epoch": 75, "memory": 35394, "step": 23462}
{"lr": 0.0015802266856912886, "data_time": 0.11086854934692383, "grad_norm": 0.3066247582435608, "loss": 0.4789242625236511, "time": 1.4188698053359985, "epoch": 76, "memory": 35394, "step": 23575}
{"lr": 0.0015799985769929714, "data_time": 0.0029799699783325194, "grad_norm": 0.3123544409871101, "loss": 0.4852247893810272, "time": 1.5040985584259032, "epoch": 76, "memory": 35394, "step": 23675}
{"lr": 0.001579769176731594, "data_time": 0.004048061370849609, "grad_norm": 0.31240170896053315, "loss": 0.4760854929685593, "time": 1.4239238262176515, "epoch": 76, "memory": 35394, "step": 23775}
{"lr": 0.0015795084005817035, "data_time": 0.3341118812561035, "grad_norm": 0.2882686048746109, "loss": 0.48059429228305817, "time": 1.4456581354141236, "epoch": 77, "memory": 35394, "step": 23888}
{"lr": 0.001579276250559913, "data_time": 0.0024668216705322266, "grad_norm": 0.31298870742321017, "loss": 0.48385761976242064, "time": 1.414208221435547, "epoch": 77, "memory": 35394, "step": 23988}
{"lr": 0.001579042810171129, "data_time": 0.002199077606201172, "grad_norm": 0.24995141178369523, "loss": 0.4708224028348923, "time": 1.413093900680542, "epoch": 77, "memory": 35394, "step": 24088}
{"lr": 0.001578777470125074, "data_time": 0.18457212448120117, "grad_norm": 0.291524176299572, "loss": 0.47947638034820556, "time": 1.4755991220474243, "epoch": 78, "memory": 35394, "step": 24201}
{"lr": 0.0015785412825457975, "data_time": 0.0018100261688232422, "grad_norm": 0.30591356605291364, "loss": 0.48005998134613037, "time": 1.4559341192245483, "epoch": 78, "memory": 35394, "step": 24301}
{"lr": 0.0015783038058165247, "data_time": 0.0025274038314819338, "grad_norm": 0.33546785563230513, "loss": 0.48248693346977234, "time": 1.4009755373001098, "epoch": 78, "memory": 35394, "step": 24401}
{"lr": 0.001578033906178709, "data_time": 0.18941004276275636, "grad_norm": 0.38854736387729644, "loss": 0.483172270655632, "time": 1.4054543018341064, "epoch": 79, "memory": 35394, "step": 24514}
{"lr": 0.001577793684873428, "data_time": 0.001753544807434082, "grad_norm": 0.3002507209777832, "loss": 0.47722973823547366, "time": 1.4152748584747314, "epoch": 79, "memory": 35394, "step": 24614}
{"lr": 0.0015775521756560642, "data_time": 0.0025355339050292967, "grad_norm": 0.31785145998001096, "loss": 0.47591774463653563, "time": 1.443723773956299, "epoch": 79, "memory": 35394, "step": 24714}
{"lr": 0.0015772777208048526, "data_time": 0.4567164421081543, "grad_norm": 0.32986190617084504, "loss": 0.4851544678211212, "time": 1.4485512495040893, "epoch": 80, "memory": 35394, "step": 24827}
{"lr": 0.0015770334696704894, "data_time": 0.03344151973724365, "grad_norm": 0.34111949503421785, "loss": 0.4847651094198227, "time": 1.5854212760925293, "epoch": 80, "memory": 35394, "step": 24927}
{"lr": 0.001576787931882847, "data_time": 0.0021339178085327147, "grad_norm": 0.26753145456314087, "loss": 0.4793927162885666, "time": 1.399675178527832, "epoch": 80, "memory": 35394, "step": 25027}
{"lr": 0.0015765089262705064, "data_time": 0.2026871919631958, "grad_norm": 0.3335011899471283, "loss": 0.4811219722032547, "time": 1.425359320640564, "epoch": 81, "memory": 35394, "step": 25140}
{"lr": 0.0015762606492693522, "data_time": 0.0023642539978027343, "grad_norm": 0.3133604019880295, "loss": 0.47606076002120973, "time": 1.439950704574585, "epoch": 81, "memory": 35394, "step": 25240}
{"lr": 0.0015760110868945963, "data_time": 0.0043716907501220705, "grad_norm": 0.3290912300348282, "loss": 0.4823719680309296, "time": 1.3926044702529907, "epoch": 81, "memory": 35394, "step": 25340}
{"lr": 0.0015757275350472106, "data_time": 0.24756698608398436, "grad_norm": 0.348510867357254, "loss": 0.47052437365055083, "time": 1.4593315362930297, "epoch": 82, "memory": 35394, "step": 25453}
{"lr": 0.0015754752362068668, "data_time": 0.0019181013107299806, "grad_norm": 0.27919830828905107, "loss": 0.4761332839727402, "time": 1.4169172286987304, "epoch": 82, "memory": 35394, "step": 25553}
{"lr": 0.0015752216532934507, "data_time": 0.003030705451965332, "grad_norm": 0.3524462252855301, "loss": 0.47510929107666017, "time": 1.4194627761840821, "epoch": 82, "memory": 35394, "step": 25653}
{"lr": 0.0015749335598108604, "data_time": 0.18993735313415527, "grad_norm": 0.30895575881004333, "loss": 0.4899993896484375, "time": 1.4627209186553956, "epoch": 83, "memory": 35394, "step": 25766}
{"lr": 0.0015746772432241715, "data_time": 0.002026820182800293, "grad_norm": 0.37514507621526716, "loss": 0.4869059532880783, "time": 1.9475071907043457, "epoch": 83, "memory": 35394, "step": 25866}
{"lr": 0.0015744196438857692, "data_time": 0.004161286354064942, "grad_norm": 0.29849001169204714, "loss": 0.47700919210910797, "time": 2.2146368980407716, "epoch": 83, "memory": 35394, "step": 25966}
{"lr": 0.001574127013441483, "data_time": 0.2130753517150879, "grad_norm": 0.2947077140212059, "loss": 0.47484877407550813, "time": 1.9380646228790284, "epoch": 84, "memory": 35394, "step": 26079}
{"lr": 0.0015738666832664692, "data_time": 0.0018954277038574219, "grad_norm": 0.2860597804188728, "loss": 0.4752158522605896, "time": 2.1415857553482054, "epoch": 84, "memory": 35394, "step": 26179}
{"lr": 0.0015736050716819102, "data_time": 0.002904963493347168, "grad_norm": 0.3383168041706085, "loss": 0.4777237236499786, "time": 2.24678795337677, "epoch": 84, "memory": 35394, "step": 26279}
{"lr": 0.001573307909023042, "data_time": 0.7199402332305909, "grad_norm": 0.30020717084407805, "loss": 0.47966374158859254, "time": 2.1592143535614015, "epoch": 85, "memory": 35394, "step": 26392}
{"lr": 0.0015730435694828316, "data_time": 0.0017989635467529296, "grad_norm": 0.3394935682415962, "loss": 0.47331240177154543, "time": 2.329042887687683, "epoch": 85, "memory": 35394, "step": 26492}
{"lr": 0.0015727779498960327, "data_time": 0.0022540569305419924, "grad_norm": 0.28029119223356247, "loss": 0.4718169867992401, "time": 2.1046238422393797, "epoch": 85, "memory": 35394, "step": 26592}
{"lr": 0.0015724762598432168, "data_time": 0.8193646192550659, "grad_norm": 0.2915079087018967, "loss": 0.4772008776664734, "time": 2.1969258785247803, "epoch": 86, "memory": 35394, "step": 26705}
{"lr": 0.0015722079152259844, "data_time": 0.0020337581634521486, "grad_norm": 0.3142016500234604, "loss": 0.47785256505012513, "time": 2.087424612045288, "epoch": 86, "memory": 35394, "step": 26805}
{"lr": 0.0015719382919458823, "data_time": 0.002662396430969238, "grad_norm": 0.33739967197179793, "loss": 0.4823438376188278, "time": 2.125537657737732, "epoch": 86, "memory": 35394, "step": 26905}
{"lr": 0.0015716320793931966, "data_time": 0.5024789094924926, "grad_norm": 0.2995141625404358, "loss": 0.48176923394203186, "time": 2.060988259315491, "epoch": 87, "memory": 35394, "step": 27018}
{"lr": 0.0015713597340520833, "data_time": 0.0020597219467163087, "grad_norm": 0.30529158711433413, "loss": 0.4751976728439331, "time": 1.9923400163650513, "epoch": 87, "memory": 35394, "step": 27118}
{"lr": 0.0015710861114525644, "data_time": 0.002542448043823242, "grad_norm": 0.31559184193611145, "loss": 0.4791397720575333, "time": 2.2410518407821653, "epoch": 87, "memory": 35394, "step": 27218}
{"lr": 0.0015707753813674517, "data_time": 0.9195078611373901, "grad_norm": 0.34081085622310636, "loss": 0.4788160502910614, "time": 2.1152315616607664, "epoch": 88, "memory": 35394, "step": 27331}
{"lr": 0.0015704990397205007, "data_time": 0.5090538024902344, "grad_norm": 0.31364541947841645, "loss": 0.4810910731554031, "time": 2.149007868766785, "epoch": 88, "memory": 35394, "step": 27431}
{"lr": 0.0015702214222403277, "data_time": 0.0026216745376586915, "grad_norm": 0.25106043070554734, "loss": 0.47947663366794585, "time": 2.3554714202880858, "epoch": 88, "memory": 35394, "step": 27531}
{"lr": 0.0015699061796635176, "data_time": 1.046254825592041, "grad_norm": 0.29452967792749407, "loss": 0.47744722962379454, "time": 2.475049114227295, "epoch": 89, "memory": 35394, "step": 27644}
{"lr": 0.001569625846193598, "data_time": 0.10507915019989014, "grad_norm": 0.316638146340847, "loss": 0.4731054902076721, "time": 2.4148926019668577, "epoch": 89, "memory": 35394, "step": 27744}
{"lr": 0.001569344238336337, "data_time": 0.0027866601943969727, "grad_norm": 0.3534388139843941, "loss": 0.4847790449857712, "time": 1.9761624813079834, "epoch": 89, "memory": 35394, "step": 27844}
{"lr": 0.0015690244883817587, "data_time": 0.9104416847229004, "grad_norm": 0.31112461239099504, "loss": 0.4772141665220261, "time": 2.1609519481658936, "epoch": 90, "memory": 35394, "step": 27957}
{"lr": 0.0015687401676365003, "data_time": 0.0021248579025268553, "grad_norm": 0.3587131053209305, "loss": 0.4760983049869537, "time": 2.3717471837997435, "epoch": 90, "memory": 35394, "step": 28057}
{"lr": 0.0015684545739704514, "data_time": 0.002273821830749512, "grad_norm": 0.29042237550020217, "loss": 0.482111394405365, "time": 1.7328290462493896, "epoch": 90, "memory": 35394, "step": 28157}
{"lr": 0.0015681303218251558, "data_time": 0.8506494045257569, "grad_norm": 0.3181259348988533, "loss": 0.4747504830360413, "time": 2.2136202573776247, "epoch": 91, "memory": 35394, "step": 28270}
{"lr": 0.0015678420184168673, "data_time": 0.227431583404541, "grad_norm": 0.3547044634819031, "loss": 0.48234164118766787, "time": 2.1882156848907472, "epoch": 91, "memory": 35394, "step": 28370}
{"lr": 0.001567552443574993, "data_time": 0.0031899929046630858, "grad_norm": 0.3005628168582916, "loss": 0.4760835587978363, "time": 2.267583465576172, "epoch": 91, "memory": 35394, "step": 28470}
{"lr": 0.0015672236944990641, "data_time": 0.954954195022583, "grad_norm": 0.36740359514951704, "loss": 0.4801318168640137, "time": 2.537081551551819, "epoch": 92, "memory": 35394, "step": 28583}
{"lr": 0.0015669314131046648, "data_time": 0.32531232833862306, "grad_norm": 0.31680052876472475, "loss": 0.48468499183654784, "time": 2.477991509437561, "epoch": 92, "memory": 35394, "step": 28683}
{"lr": 0.0015666378617845086, "data_time": 0.003254413604736328, "grad_norm": 0.2866571843624115, "loss": 0.4801236569881439, "time": 2.508101201057434, "epoch": 92, "memory": 35394, "step": 28783}
{"lr": 0.0015663046211109844, "data_time": 1.0924968004226685, "grad_norm": 0.35988081991672516, "loss": 0.48262550830841067, "time": 3.048290324211121, "epoch": 93, "memory": 35394, "step": 28896}
{"lr": 0.0015660083664719205, "data_time": 0.002443742752075195, "grad_norm": 0.2954444959759712, "loss": 0.47745168805122373, "time": 2.4516408920288084, "epoch": 93, "memory": 35394, "step": 28996}
{"lr": 0.001565710843435531, "data_time": 0.002930307388305664, "grad_norm": 0.27754895985126493, "loss": 0.47374234795570375, "time": 2.5391594886779787, "epoch": 93, "memory": 35394, "step": 29096}
{"lr": 0.0015653731165703097, "data_time": 0.7375475168228149, "grad_norm": 0.3163162708282471, "loss": 0.4788618505001068, "time": 2.1727972507476805, "epoch": 94, "memory": 35394, "step": 29209}
{"lr": 0.0015650728934924884, "data_time": 0.041879892349243164, "grad_norm": 0.35028460174798964, "loss": 0.4722868144512177, "time": 2.3334325313568116, "epoch": 94, "memory": 35394, "step": 29309}
{"lr": 0.0015647714035663454, "data_time": 0.002885890007019043, "grad_norm": 0.31459456086158755, "loss": 0.4795906484127045, "time": 2.2788072347640993, "epoch": 94, "memory": 35394, "step": 29409}
{"lr": 0.0015644291959881023, "data_time": 0.1617257833480835, "grad_norm": 0.32484031915664674, "loss": 0.48125424385070803, "time": 1.4856204271316529, "epoch": 95, "memory": 35394, "step": 29522}
{"lr": 0.0015641250093418053, "data_time": 0.0018291234970092773, "grad_norm": 0.30954408198595046, "loss": 0.47412261068820954, "time": 1.4849161386489869, "epoch": 95, "memory": 35394, "step": 29622}
{"lr": 0.0015638195574167423, "data_time": 0.0026758432388305662, "grad_norm": 0.3256224423646927, "loss": 0.47492454051971433, "time": 1.5222203731536865, "epoch": 95, "memory": 35394, "step": 29722}
{"lr": 0.0015634728746768407, "data_time": 0.2995991945266724, "grad_norm": 0.3052894562482834, "loss": 0.4830619305372238, "time": 1.5591410875320435, "epoch": 96, "memory": 35394, "step": 29835}
{"lr": 0.0015631647293966437, "data_time": 0.02089996337890625, "grad_norm": 0.3282967507839203, "loss": 0.474870091676712, "time": 1.5260091066360473, "epoch": 96, "memory": 35394, "step": 29935}
{"lr": 0.0015628553204277667, "data_time": 0.002770543098449707, "grad_norm": 0.2951935052871704, "loss": 0.4823440253734589, "time": 1.5552218198776244, "epoch": 96, "memory": 35394, "step": 30035}
{"lr": 0.001562504168150165, "data_time": 0.33474709987640383, "grad_norm": 0.33885401487350464, "loss": 0.4731862962245941, "time": 1.535073947906494, "epoch": 97, "memory": 35394, "step": 30148}
{"lr": 0.0015621920692348687, "data_time": 0.002127218246459961, "grad_norm": 0.32149524092674253, "loss": 0.477534818649292, "time": 1.551186203956604, "epoch": 97, "memory": 35394, "step": 30248}
{"lr": 0.0015618787082414716, "data_time": 0.0038061618804931642, "grad_norm": 0.32363958805799486, "loss": 0.4800867348909378, "time": 1.5054139614105224, "epoch": 97, "memory": 35394, "step": 30348}
{"lr": 0.0015615230921226355, "data_time": 0.1962672710418701, "grad_norm": 0.2659095421433449, "loss": 0.4735682010650635, "time": 1.4443482160568237, "epoch": 98, "memory": 35394, "step": 30461}
{"lr": 0.00156120704463517, "data_time": 0.002464628219604492, "grad_norm": 0.2671653091907501, "loss": 0.48004003763198855, "time": 1.5178703308105468, "epoch": 98, "memory": 35394, "step": 30561}
{"lr": 0.001560889736700662, "data_time": 0.004028129577636719, "grad_norm": 0.3026925966143608, "loss": 0.47298750281333923, "time": 1.5188317537307738, "epoch": 98, "memory": 35394, "step": 30661}
{"lr": 0.0015605296625094702, "data_time": 0.12118771076202392, "grad_norm": 0.356028738617897, "loss": 0.4779022365808487, "time": 1.5099666357040404, "epoch": 99, "memory": 35394, "step": 30774}
{"lr": 0.0015602096715768247, "data_time": 0.0026675701141357423, "grad_norm": 0.34837097972631453, "loss": 0.48326765894889834, "time": 1.4807327508926391, "epoch": 99, "memory": 35394, "step": 30874}
{"lr": 0.0015598884218486389, "data_time": 0.0036340951919555664, "grad_norm": 0.30335623770952225, "loss": 0.478547939658165, "time": 1.5443655967712402, "epoch": 99, "memory": 35394, "step": 30974}
{"lr": 0.0015595238954262893, "data_time": 0.3009273767471313, "grad_norm": 0.25532241314649584, "loss": 0.47321973741054535, "time": 1.490832781791687, "epoch": 100, "memory": 35394, "step": 31087}
{"lr": 0.0015591999662394225, "data_time": 0.002525591850280762, "grad_norm": 0.2454588383436203, "loss": 0.4694360733032227, "time": 1.5175131797790526, "epoch": 100, "memory": 35394, "step": 31187}
{"lr": 0.0015588747799289396, "data_time": 0.0020342588424682615, "grad_norm": 0.34122484028339384, "loss": 0.47171888053417205, "time": 1.423122763633728, "epoch": 100, "memory": 35394, "step": 31287}
{"lr": 0.0015585058071888533, "data_time": 0.25039334297180177, "grad_norm": 0.27993002980947496, "loss": 0.47857773005962373, "time": 1.5108296155929566, "epoch": 101, "memory": 35394, "step": 31400}
{"lr": 0.0015581779450026124, "data_time": 0.0028077840805053713, "grad_norm": 0.28988301903009417, "loss": 0.476138111948967, "time": 1.5729996681213378, "epoch": 101, "memory": 35394, "step": 31500}
{"lr": 0.001557848827385073, "data_time": 0.0027988195419311524, "grad_norm": 0.31916010528802874, "loss": 0.4801072657108307, "time": 1.460622239112854, "epoch": 101, "memory": 35394, "step": 31600}
{"lr": 0.0015574754143128063, "data_time": 0.14438023567199706, "grad_norm": 0.2767644554376602, "loss": 0.4820625364780426, "time": 1.5769403457641602, "epoch": 102, "memory": 35394, "step": 31713}
{"lr": 0.001557143624445839, "data_time": 0.0021559476852416994, "grad_norm": 0.37751572728157046, "loss": 0.4752889037132263, "time": 1.5751543283462524, "epoch": 102, "memory": 35394, "step": 31813}
{"lr": 0.001556810580860257, "data_time": 0.003692626953125, "grad_norm": 0.27924744784832, "loss": 0.47452669143676757, "time": 1.5127322196960449, "epoch": 102, "memory": 35394, "step": 31913}
{"lr": 0.0015564327335133923, "data_time": 0.032277393341064456, "grad_norm": 0.2581332340836525, "loss": 0.4747571676969528, "time": 1.5106006383895874, "epoch": 103, "memory": 35394, "step": 32026}
{"lr": 0.0015560970213480646, "data_time": 0.002351045608520508, "grad_norm": 0.2734809070825577, "loss": 0.4784164637327194, "time": 1.5012293338775635, "epoch": 103, "memory": 35394, "step": 32126}
{"lr": 0.0015557600571971416, "data_time": 0.0028806686401367187, "grad_norm": 0.2692338898777962, "loss": 0.4778044521808624, "time": 1.531997013092041, "epoch": 103, "memory": 35394, "step": 32226}
{"lr": 0.0015553777817051972, "data_time": 0.6598567724227905, "grad_norm": 0.28214792609214784, "loss": 0.4746577203273773, "time": 2.4955666303634643, "epoch": 104, "memory": 35394, "step": 32339}
{"lr": 0.0015550381526875014, "data_time": 0.0019818782806396485, "grad_norm": 0.34435420781373977, "loss": 0.4788251519203186, "time": 2.386242914199829, "epoch": 104, "memory": 35394, "step": 32439}
{"lr": 0.0015546972734375396, "data_time": 0.002129817008972168, "grad_norm": 0.39464961290359496, "loss": 0.4793114423751831, "time": 2.5850130558013915, "epoch": 104, "memory": 35394, "step": 32539}
{"lr": 0.0015543105760018644, "data_time": 1.1868463516235352, "grad_norm": 0.2863398939371109, "loss": 0.47232466340065005, "time": 2.3507082223892213, "epoch": 105, "memory": 35394, "step": 32652}
{"lr": 0.00155396703564133, "data_time": 0.6146623134613037, "grad_norm": 0.31070339381694795, "loss": 0.47495160102844236, "time": 2.2180189371109007, "epoch": 105, "memory": 35394, "step": 32752}
{"lr": 0.001553622246822145, "data_time": 0.17783138751983643, "grad_norm": 0.28727133125066756, "loss": 0.47763410210609436, "time": 2.938042402267456, "epoch": 105, "memory": 35394, "step": 32852}
{"lr": 0.001553231133715831, "data_time": 0.9687827587127685, "grad_norm": 0.3745904445648193, "loss": 0.4758849382400513, "time": 2.523042392730713, "epoch": 106, "memory": 35394, "step": 32965}
{"lr": 0.0015528836875854416, "data_time": 0.28514552116394043, "grad_norm": 0.2848851397633553, "loss": 0.4774456948041916, "time": 2.379490041732788, "epoch": 106, "memory": 35394, "step": 33065}
{"lr": 0.0015525349947902673, "data_time": 0.0028421878814697266, "grad_norm": 0.275187349319458, "loss": 0.4782686918973923, "time": 2.449995279312134, "epoch": 106, "memory": 35394, "step": 33165}
{"lr": 0.001552139472358029, "data_time": 0.854512882232666, "grad_norm": 0.2741192728281021, "loss": 0.4699945032596588, "time": 3.7374187707901, "epoch": 107, "memory": 35394, "step": 33278}
{"lr": 0.0015517881260941307, "data_time": 0.5611419677734375, "grad_norm": 0.35478782653808594, "loss": 0.47467002272605896, "time": 2.5739826917648316, "epoch": 107, "memory": 35394, "step": 33378}
{"lr": 0.0015514355349795339, "data_time": 0.0024090051651000977, "grad_norm": 0.2721721678972244, "loss": 0.48151666224002837, "time": 2.5377068758010863, "epoch": 107, "memory": 35394, "step": 33478}
{"lr": 0.0015510356096376202, "data_time": 0.47487564086914064, "grad_norm": 0.2993902176618576, "loss": 0.472411972284317, "time": 2.3266283750534056, "epoch": 108, "memory": 35394, "step": 33591}
{"lr": 0.0015506803689398225, "data_time": 0.002274942398071289, "grad_norm": 0.34617309719324113, "loss": 0.4799539357423782, "time": 2.508750796318054, "epoch": 108, "memory": 35394, "step": 33691}
{"lr": 0.001550323885225609, "data_time": 0.002733469009399414, "grad_norm": 0.3418157771229744, "loss": 0.47598194479942324, "time": 2.9010077238082888, "epoch": 108, "memory": 35394, "step": 33791}
{"lr": 0.00154991956346169, "data_time": 0.6746679544448853, "grad_norm": 0.2776546910405159, "loss": 0.47613321542739867, "time": 2.7318278789520263, "epoch": 109, "memory": 35394, "step": 33904}
{"lr": 0.001549560434092782, "data_time": 0.0028600215911865233, "grad_norm": 0.2656896784901619, "loss": 0.4709941059350967, "time": 2.484470987319946, "epoch": 109, "memory": 35394, "step": 34004}
{"lr": 0.001549200063561908, "data_time": 0.0022194385528564453, "grad_norm": 0.34455113410949706, "loss": 0.47804089188575744, "time": 2.558576726913452, "epoch": 109, "memory": 35394, "step": 34104}
{"lr": 0.001548791351934975, "data_time": 1.1264537811279296, "grad_norm": 0.267319031059742, "loss": 0.47937820553779603, "time": 2.354544472694397, "epoch": 110, "memory": 35394, "step": 34217}
{"lr": 0.001548428339720828, "data_time": 0.2083815813064575, "grad_norm": 0.26803170293569567, "loss": 0.47204806506633756, "time": 2.0963058710098266, "epoch": 110, "memory": 35394, "step": 34317}
{"lr": 0.0015480640882192948, "data_time": 0.009103155136108399, "grad_norm": 0.23410028964281082, "loss": 0.47672114670276644, "time": 2.6140048503875732, "epoch": 110, "memory": 35394, "step": 34417}
{"lr": 0.0015476509933595535, "data_time": 0.00215001106262207, "grad_norm": 0.31795505434274673, "loss": 0.479699844121933, "time": 2.2621094942092896, "epoch": 111, "memory": 35394, "step": 34530}
{"lr": 0.001547284104189027, "data_time": 0.002465176582336426, "grad_norm": 0.2986157774925232, "loss": 0.4762400180101395, "time": 2.474950838088989, "epoch": 111, "memory": 35394, "step": 34630}
{"lr": 0.0015469159776257998, "data_time": 0.00265355110168457, "grad_norm": 0.33994347006082537, "loss": 0.4706627368927002, "time": 2.835798478126526, "epoch": 111, "memory": 35394, "step": 34730}
{"lr": 0.0015464985062345602, "data_time": 0.600957703590393, "grad_norm": 0.2812295719981194, "loss": 0.47626870274543764, "time": 3.0044139862060546, "epoch": 112, "memory": 35394, "step": 34843}
{"lr": 0.0015461277460594067, "data_time": 0.14665694236755372, "grad_norm": 0.3102095380425453, "loss": 0.4778727263212204, "time": 2.45968816280365, "epoch": 112, "memory": 35394, "step": 34943}
{"lr": 0.001545755750406312, "data_time": 0.002804994583129883, "grad_norm": 0.2971613436937332, "loss": 0.4795677363872528, "time": 2.907648801803589, "epoch": 112, "memory": 35394, "step": 35043}
{"lr": 0.0015453339092558833, "data_time": 1.3868919134140014, "grad_norm": 0.33067126125097274, "loss": 0.47298039495944977, "time": 2.775256872177124, "epoch": 113, "memory": 35394, "step": 35156}
{"lr": 0.001544959284090648, "data_time": 0.002278709411621094, "grad_norm": 0.2808442205190659, "loss": 0.47416658997535704, "time": 2.2853643178939818, "epoch": 113, "memory": 35394, "step": 35256}
{"lr": 0.0015445834253822754, "data_time": 0.002661991119384766, "grad_norm": 0.28922703862190247, "loss": 0.47980514764785764, "time": 2.450971841812134, "epoch": 113, "memory": 35394, "step": 35356}
{"lr": 0.0015441572213158517, "data_time": 0.8556462287902832, "grad_norm": 0.26235984116792677, "loss": 0.47040835320949553, "time": 2.2731029272079466, "epoch": 114, "memory": 35394, "step": 35469}
{"lr": 0.0015437787372377869, "data_time": 0.002801346778869629, "grad_norm": 0.23983387798070907, "loss": 0.4785834699869156, "time": 2.2511993169784548, "epoch": 114, "memory": 35394, "step": 35569}
{"lr": 0.0015433990215713953, "data_time": 0.0024569034576416016, "grad_norm": Infinity, "loss": 0.46883452832698824, "time": 2.7086756944656374, "epoch": 114, "memory": 35394, "step": 35669}
{"lr": 0.0015429684615029461, "data_time": 0.6798620223999023, "grad_norm": 0.2998159185051918, "loss": 0.4738893687725067, "time": 2.2724390506744383, "epoch": 115, "memory": 35394, "step": 35782}
{"lr": 0.0015425861246518952, "data_time": 0.002052760124206543, "grad_norm": 0.30916945934295653, "loss": 0.46914381086826323, "time": 2.3349886894226075, "epoch": 115, "memory": 35394, "step": 35882}
{"lr": 0.00154220255818731, "data_time": 0.0028044939041137694, "grad_norm": 0.3078797787427902, "loss": 0.4744331270456314, "time": 2.3280357122421265, "epoch": 115, "memory": 35394, "step": 35982}
{"lr": 0.0015417676491014717, "data_time": 1.2012667894363402, "grad_norm": 0.304323798418045, "loss": 0.47303786873817444, "time": 2.4665371417999267, "epoch": 116, "memory": 35394, "step": 36095}
{"lr": 0.0015413814656797858, "data_time": 0.7634369611740113, "grad_norm": 0.3097640335559845, "loss": 0.4812522649765015, "time": 2.496894598007202, "epoch": 116, "memory": 35394, "step": 36195}
{"lr": 0.0015409940546393026, "data_time": 0.0019404172897338867, "grad_norm": 0.2636282995343208, "loss": 0.474294176697731, "time": 2.3835542678833006, "epoch": 116, "memory": 35394, "step": 36295}
{"lr": 0.00154055480359126, "data_time": 0.8499520778656006, "grad_norm": 0.2867503181099892, "loss": 0.47461034655570983, "time": 2.2687394857406615, "epoch": 117, "memory": 35394, "step": 36408}
{"lr": 0.001540164779863683, "data_time": 0.0019910573959350587, "grad_norm": 0.29721391946077347, "loss": 0.47330630719661715, "time": 2.407616209983826, "epoch": 117, "memory": 35394, "step": 36508}
{"lr": 0.0015397735305319642, "data_time": 0.002522563934326172, "grad_norm": 0.36756914258003237, "loss": 0.4714828014373779, "time": 2.374008297920227, "epoch": 117, "memory": 35394, "step": 36608}
{"lr": 0.0015393299446473377, "data_time": 0.6163173675537109, "grad_norm": 0.271674382686615, "loss": 0.47720256745815276, "time": 2.4831499338150023, "epoch": 118, "memory": 35394, "step": 36721}
{"lr": 0.0015389360869409177, "data_time": 0.0022066831588745117, "grad_norm": 0.2805375397205353, "loss": 0.46763473451137544, "time": 2.3052737712860107, "epoch": 118, "memory": 35394, "step": 36821}
{"lr": 0.001538541005664889, "data_time": 0.0026031970977783204, "grad_norm": Infinity, "loss": 0.47679533064365387, "time": 2.1638134241104128, "epoch": 118, "memory": 35394, "step": 36921}
{"lr": 0.0015380930921396227, "data_time": 0.8020439147949219, "grad_norm": 0.31943307518959047, "loss": 0.470036044716835, "time": 2.1892340660095213, "epoch": 119, "memory": 35394, "step": 37034}
{"lr": 0.001537695406843602, "data_time": 0.2944634914398193, "grad_norm": 0.32586196064949036, "loss": 0.47273361384868623, "time": 2.70890212059021, "epoch": 119, "memory": 35394, "step": 37134}
{"lr": 0.001537296500032355, "data_time": 0.0025515079498291014, "grad_norm": 0.31721313297748566, "loss": 0.47995749711990354, "time": 2.7565740585327148, "epoch": 119, "memory": 35394, "step": 37234}
{"lr": 0.0015368442661325933, "data_time": 1.712332010269165, "grad_norm": 0.333935572206974, "loss": 0.4773733586072922, "time": 3.4199946880340577, "epoch": 120, "memory": 35394, "step": 37347}
{"lr": 0.001536442759698301, "data_time": 0.23548977375030516, "grad_norm": 0.3427802473306656, "loss": 0.48014276623725893, "time": 2.201482820510864, "epoch": 120, "memory": 35394, "step": 37447}
{"lr": 0.0015360400338229867, "data_time": 0.002941274642944336, "grad_norm": 0.2894239202141762, "loss": 0.4689519852399826, "time": 2.3982590675354003, "epoch": 120, "memory": 35394, "step": 37547}
{"lr": 0.0015355834868849643, "data_time": 1.0368220806121826, "grad_norm": 0.23196182250976563, "loss": 0.47305192053318024, "time": 2.387797808647156, "epoch": 121, "memory": 35394, "step": 37660}
{"lr": 0.0015351781658257272, "data_time": 0.25934438705444335, "grad_norm": 0.2624538719654083, "loss": 0.4708796739578247, "time": 2.274696159362793, "epoch": 121, "memory": 35394, "step": 37760}
{"lr": 0.0015347716274194466, "data_time": 0.0035271406173706054, "grad_norm": 0.30069454461336137, "loss": 0.47127523422241213, "time": 2.4653364181518556, "epoch": 121, "memory": 35394, "step": 37860}
{"lr": 0.00153431077484936, "data_time": 0.7727572441101074, "grad_norm": 0.2910318002104759, "loss": 0.4756147235631943, "time": 2.43692467212677, "epoch": 122, "memory": 35394, "step": 37973}
{"lr": 0.001533901645740376, "data_time": 0.4026294469833374, "grad_norm": 0.31368475705385207, "loss": 0.47683907151222227, "time": 2.303330183029175, "epoch": 122, "memory": 35394, "step": 38073}
{"lr": 0.0015334913013980787, "data_time": 0.0028315067291259767, "grad_norm": 0.261027005314827, "loss": 0.47640861868858336, "time": 2.4231287240982056, "epoch": 122, "memory": 35394, "step": 38173}
{"lr": 0.0015330261506719743, "data_time": 1.4167730569839478, "grad_norm": 0.3147707641124725, "loss": 0.4704655587673187, "time": 2.7263286828994753, "epoch": 123, "memory": 35394, "step": 38286}
{"lr": 0.0015326132201502218, "data_time": 0.4114459276199341, "grad_norm": 0.30759001076221465, "loss": 0.47119708359241486, "time": 2.863646078109741, "epoch": 123, "memory": 35394, "step": 38386}
{"lr": 0.0015321990765286002, "data_time": 0.002679133415222168, "grad_norm": 0.34023456573486327, "loss": 0.48147891759872435, "time": 2.969323921203613, "epoch": 123, "memory": 35394, "step": 38486}
{"lr": 0.0015317296351922513, "data_time": 0.7105276346206665, "grad_norm": 0.28743119686841967, "loss": 0.4709644913673401, "time": 2.364086413383484, "epoch": 124, "memory": 35394, "step": 38599}
{"lr": 0.0015313129099563767, "data_time": 0.16751577854156494, "grad_norm": 0.3098493814468384, "loss": 0.4730879068374634, "time": 2.532492423057556, "epoch": 124, "memory": 35394, "step": 38699}
{"lr": 0.001530894973773754, "data_time": 0.0025215864181518553, "grad_norm": 0.2907585933804512, "loss": 0.47205414474010465, "time": 2.7943923234939576, "epoch": 124, "memory": 35394, "step": 38799}
{"lr": 0.0015304212494425353, "data_time": 0.6854078769683838, "grad_norm": 0.32449092119932177, "loss": 0.4748063266277313, "time": 2.1547383785247805, "epoch": 125, "memory": 35394, "step": 38912}
{"lr": 0.0015300007362527414, "data_time": 0.0021018028259277345, "grad_norm": 0.27660332918167113, "loss": 0.4772142440080643, "time": 1.4772688150405884, "epoch": 125, "memory": 35394, "step": 39012}
{"lr": 0.001529579014288964, "data_time": 0.0028724431991577148, "grad_norm": 0.2887726783752441, "loss": 0.4672374755144119, "time": 1.5354890823364258, "epoch": 125, "memory": 35394, "step": 39112}
{"lr": 0.0015291010146477298, "data_time": 0.19482796192169188, "grad_norm": 0.33580284416675565, "loss": 0.4699686974287033, "time": 1.5044360637664795, "epoch": 126, "memory": 35394, "step": 39225}
{"lr": 0.0015286767203256664, "data_time": 0.0030896902084350587, "grad_norm": 0.3206571951508522, "loss": 0.4643645346164703, "time": 1.5261787176132202, "epoch": 126, "memory": 35394, "step": 39325}
{"lr": 0.0015282512194219988, "data_time": 0.0034430265426635743, "grad_norm": 0.34176985174417496, "loss": 0.48017332553863523, "time": 1.5261290788650512, "epoch": 126, "memory": 35394, "step": 39425}
{"lr": 0.001527768952224957, "data_time": 0.14938838481903077, "grad_norm": 0.24689465165138244, "loss": 0.47475376129150393, "time": 1.5154525518417359, "epoch": 127, "memory": 35394, "step": 39538}
{"lr": 0.0015273408836536154, "data_time": 0.003107500076293945, "grad_norm": 0.3049063727259636, "loss": 0.47641478180885316, "time": 1.5229097127914428, "epoch": 127, "memory": 35394, "step": 39638}
{"lr": 0.0015269116107126165, "data_time": 0.0037434577941894533, "grad_norm": 0.28052459061145785, "loss": 0.47288874685764315, "time": 1.474844217300415, "epoch": 127, "memory": 35394, "step": 39738}
{"lr": 0.0015264250837832055, "data_time": 0.49034335613250735, "grad_norm": 0.3498168483376503, "loss": 0.4748812079429626, "time": 1.5090142488479614, "epoch": 128, "memory": 35394, "step": 39851}
{"lr": 0.001525993247906804, "data_time": 0.18855619430541992, "grad_norm": 0.34499490559101104, "loss": 0.4765111058950424, "time": 1.5513411521911622, "epoch": 128, "memory": 35394, "step": 39951}
{"lr": 0.001525560209892234, "data_time": 0.0036005258560180666, "grad_norm": 0.2641161888837814, "loss": 0.4696519821882248, "time": 1.5414490222930908, "epoch": 128, "memory": 35394, "step": 40051}
{"lr": 0.0015250694311229956, "data_time": 0.19107959270477295, "grad_norm": 0.3335075691342354, "loss": 0.4698716074228287, "time": 1.550611662864685, "epoch": 129, "memory": 35394, "step": 40164}
{"lr": 0.0015246338349468656, "data_time": 0.003242301940917969, "grad_norm": 0.3005190253257751, "loss": 0.47449831366539, "time": 1.392116403579712, "epoch": 129, "memory": 35394, "step": 40264}
{"lr": 0.0015241970388835528, "data_time": 0.003750300407409668, "grad_norm": 0.3065024048089981, "loss": 0.48185897469520567, "time": 1.5068603038787842, "epoch": 129, "memory": 35394, "step": 40364}
{"lr": 0.0015237020162360037, "data_time": 0.19566068649291993, "grad_norm": 0.3168582618236542, "loss": 0.47158542573451995, "time": 1.5115906953811646, "epoch": 130, "memory": 35394, "step": 40477}
{"lr": 0.0015232626668264765, "data_time": 0.002096748352050781, "grad_norm": 0.3261592507362366, "loss": 0.47721263468265535, "time": 1.5713975429534912, "epoch": 130, "memory": 35394, "step": 40577}
{"lr": 0.0015228221198002208, "data_time": 0.003211212158203125, "grad_norm": 0.2915771767497063, "loss": 0.47142797112464907, "time": 1.5342297315597535, "epoch": 130, "memory": 35394, "step": 40677}
{"lr": 0.0015223228613047185, "data_time": 0.5008122205734253, "grad_norm": 0.3279244422912598, "loss": 0.47035797238349913, "time": 1.9202666282653809, "epoch": 131, "memory": 35394, "step": 40790}
{"lr": 0.0015218797657890106, "data_time": 0.00253148078918457, "grad_norm": 0.32197755873203276, "loss": 0.46839495599269865, "time": 1.477695918083191, "epoch": 131, "memory": 35394, "step": 40890}
{"lr": 0.001521435474946456, "data_time": 0.0034786462783813477, "grad_norm": 0.36583139598369596, "loss": 0.47383653223514555, "time": 1.504993987083435, "epoch": 131, "memory": 35394, "step": 40990}
{"lr": 0.0015209319887020767, "data_time": 0.44917752742767336, "grad_norm": 0.2731630265712738, "loss": 0.4720313549041748, "time": 1.5361457109451293, "epoch": 132, "memory": 35394, "step": 41103}
{"lr": 0.001520485154268176, "data_time": 0.002744293212890625, "grad_norm": Infinity, "loss": 0.4715107917785645, "time": 1.4997326135635376, "epoch": 132, "memory": 35394, "step": 41203}
{"lr": 0.0015200371268167007, "data_time": 0.0024310588836669923, "grad_norm": 0.30857538878917695, "loss": 0.4681195944547653, "time": 1.4852517127990723, "epoch": 132, "memory": 35394, "step": 41303}
{"lr": 0.001519529420991101, "data_time": 0.26391384601593015, "grad_norm": 0.3093553677201271, "loss": 0.47253804504871366, "time": 1.5403441667556763, "epoch": 133, "memory": 35394, "step": 41416}
{"lr": 0.0015190788548876477, "data_time": 0.0020383596420288086, "grad_norm": 0.2796496286988258, "loss": 0.4730890214443207, "time": 1.5757716417312622, "epoch": 133, "memory": 35394, "step": 41516}
{"lr": 0.0015186270980952444, "data_time": 0.002057051658630371, "grad_norm": 0.3021573320031166, "loss": 0.4744404435157776, "time": 1.5458202838897706, "epoch": 133, "memory": 35394, "step": 41616}
{"lr": 0.0015181151809245358, "data_time": 0.038042521476745604, "grad_norm": 0.2983107894659042, "loss": 0.474787500500679, "time": 1.5647557258605957, "epoch": 134, "memory": 35394, "step": 41729}
{"lr": 0.0015176608904607074, "data_time": 0.0022043466567993166, "grad_norm": 0.29705182313919065, "loss": 0.47012669444084165, "time": 1.5464214086532593, "epoch": 134, "memory": 35394, "step": 41829}
{"lr": 0.001517205411655871, "data_time": 0.003471970558166504, "grad_norm": 0.2771416500210762, "loss": 0.47402224838733675, "time": 1.5363382577896119, "epoch": 134, "memory": 35394, "step": 41929}
{"lr": 0.0015166892914444787, "data_time": 0.14738709926605226, "grad_norm": 0.3622789531946182, "loss": 0.4740449368953705, "time": 1.5409710645675658, "epoch": 135, "memory": 35394, "step": 42042}
{"lr": 0.0015162312839898696, "data_time": 0.002611684799194336, "grad_norm": 0.2911794900894165, "loss": 0.4765596240758896, "time": 1.6071103096008301, "epoch": 135, "memory": 35394, "step": 42142}
{"lr": 0.0015157720905614703, "data_time": 0.003503584861755371, "grad_norm": 0.3186375185847282, "loss": 0.4730557233095169, "time": 1.5241606950759887, "epoch": 135, "memory": 35394, "step": 42242}
{"lr": 0.0015152517756820054, "data_time": 0.0023502826690673826, "grad_norm": 0.29932925999164584, "loss": 0.470887354016304, "time": 1.2674506664276124, "epoch": 136, "memory": 35394, "step": 42355}
{"lr": 0.0015147900586665066, "data_time": 0.001971721649169922, "grad_norm": 0.32039225697517393, "loss": 0.4695597171783447, "time": 1.586123490333557, "epoch": 136, "memory": 35394, "step": 42455}
{"lr": 0.001514327158063676, "data_time": 0.002516341209411621, "grad_norm": 0.2506763473153114, "loss": 0.47509843707084654, "time": 1.5699755907058717, "epoch": 136, "memory": 35394, "step": 42555}
{"lr": 0.0015138026569567924, "data_time": 0.37915477752685545, "grad_norm": 0.3222238317131996, "loss": 0.4728421807289124, "time": 1.5916094303131103, "epoch": 137, "memory": 35394, "step": 42668}
{"lr": 0.0015133372378704742, "data_time": 0.0021453857421875, "grad_norm": 0.35739823430776596, "loss": 0.46524512469768525, "time": 1.5611443281173707, "epoch": 137, "memory": 35394, "step": 42768}
{"lr": 0.0015128706376024825, "data_time": 0.0025995492935180662, "grad_norm": 0.2964851394295692, "loss": 0.4682304412126541, "time": 1.574564027786255, "epoch": 137, "memory": 35394, "step": 42868}
{"lr": 0.0015123419587767459, "data_time": 0.32753925323486327, "grad_norm": 0.3161149948835373, "loss": 0.4761368244886398, "time": 1.60071120262146, "epoch": 138, "memory": 35394, "step": 42981}
{"lr": 0.0015118728451697324, "data_time": 0.0024407625198364256, "grad_norm": 0.3272458344697952, "loss": 0.4651777923107147, "time": 1.633575439453125, "epoch": 138, "memory": 35394, "step": 43081}
{"lr": 0.0015114025528058627, "data_time": 0.002880430221557617, "grad_norm": 0.3205661579966545, "loss": 0.47191317081451417, "time": 1.5771793842315673, "epoch": 138, "memory": 35394, "step": 43181}
{"lr": 0.0015108697048376106, "data_time": 0.34728777408599854, "grad_norm": 0.34943257868289945, "loss": 0.46983436942100526, "time": 1.5387550592422485, "epoch": 139, "memory": 35394, "step": 43294}
{"lr": 0.0015103969043199616, "data_time": 0.0021675825119018555, "grad_norm": 0.3067884311079979, "loss": 0.468424916267395, "time": 1.5409719944000244, "epoch": 139, "memory": 35394, "step": 43394}
{"lr": 0.001509922927489398, "data_time": 0.004285502433776856, "grad_norm": 0.30799558013677597, "loss": 0.47238331437110903, "time": 1.6027888536453248, "epoch": 139, "memory": 35394, "step": 43494}
{"lr": 0.0015093859190226028, "data_time": 0.17321910858154296, "grad_norm": 0.31200132220983506, "loss": 0.4691552758216858, "time": 1.5464394807815551, "epoch": 140, "memory": 35394, "step": 43607}
{"lr": 0.0015089094392641855, "data_time": 0.0023967981338500976, "grad_norm": 0.31260114908218384, "loss": 0.4732578545808792, "time": 1.3264846086502076, "epoch": 140, "memory": 35394, "step": 43707}
{"lr": 0.0015084317856558773, "data_time": 0.004052591323852539, "grad_norm": 0.30292615592479705, "loss": 0.46860927641391753, "time": 1.5729292392730714, "epoch": 140, "memory": 35394, "step": 43807}
{"lr": 0.0015078906254019993, "data_time": 0.6320759057998657, "grad_norm": 0.2721040934324265, "loss": 0.47513239085674286, "time": 1.6140924215316772, "epoch": 141, "memory": 35394, "step": 43920}
{"lr": 0.0015074104741323673, "data_time": 0.11618835926055908, "grad_norm": 0.32451860308647157, "loss": 0.4706403374671936, "time": 1.6014758348464966, "epoch": 141, "memory": 35394, "step": 44020}
{"lr": 0.0015069291514949103, "data_time": 0.00588679313659668, "grad_norm": 0.35921775698661806, "loss": 0.4727573275566101, "time": 1.5812970876693726, "epoch": 141, "memory": 35394, "step": 44120}
{"lr": 0.001506383848232766, "data_time": 0.16838064193725585, "grad_norm": 0.31906844973564147, "loss": 0.47091881930828094, "time": 1.6168460607528687, "epoch": 142, "memory": 35394, "step": 44233}
{"lr": 0.0015059000332410335, "data_time": 0.002537989616394043, "grad_norm": 0.29328656047582624, "loss": 0.4723560571670532, "time": 1.5641207218170166, "epoch": 142, "memory": 35394, "step": 44333}
{"lr": 0.0015054150493825434, "data_time": 0.0036371707916259765, "grad_norm": 0.2834935933351517, "loss": 0.47042456567287444, "time": 1.5626039981842041, "epoch": 142, "memory": 35394, "step": 44433}
{"lr": 0.001504865611958154, "data_time": 0.21551146507263183, "grad_norm": 0.30720393508672716, "loss": 0.4734711617231369, "time": 1.5661351680755615, "epoch": 143, "memory": 35394, "step": 44546}
{"lr": 0.001504378141092868, "data_time": 0.001907515525817871, "grad_norm": 0.32868328839540484, "loss": 0.4730159372091293, "time": 1.5759127140045166, "epoch": 143, "memory": 35394, "step": 44646}
{"lr": 0.0015038895038808524, "data_time": 0.0026702165603637697, "grad_norm": 0.29674152582883834, "loss": 0.46973936557769774, "time": 1.5905919551849366, "epoch": 143, "memory": 35394, "step": 44746}
{"lr": 0.0015033359412073067, "data_time": 0.09087450504302978, "grad_norm": 0.34119083136320116, "loss": 0.47113206088542936, "time": 1.5723473072052, "epoch": 144, "memory": 35394, "step": 44859}
{"lr": 0.0015028448223763182, "data_time": 0.0023058652877807617, "grad_norm": 0.3047960489988327, "loss": 0.47055254578590394, "time": 1.5917538166046143, "epoch": 144, "memory": 35394, "step": 44959}
{"lr": 0.0015023525397375535, "data_time": 0.002065730094909668, "grad_norm": 0.26329913139343264, "loss": 0.47065061032772065, "time": 1.2711148262023926, "epoch": 144, "memory": 35394, "step": 45059}
{"lr": 0.0015017948607948578, "data_time": 0.03730177879333496, "grad_norm": 0.346693417429924, "loss": 0.480545973777771, "time": 1.5721995830535889, "epoch": 145, "memory": 35394, "step": 45172}
{"lr": 0.0015013001019652005, "data_time": 0.0028921842575073244, "grad_norm": 0.2854103922843933, "loss": 0.47192174196243286, "time": 1.649791955947876, "epoch": 145, "memory": 35394, "step": 45272}
{"lr": 0.0015008041818855976, "data_time": 0.0035811424255371093, "grad_norm": 0.3206455335021019, "loss": 0.4643166273832321, "time": 1.548967432975769, "epoch": 145, "memory": 35394, "step": 45372}
{"lr": 0.0015002423957205383, "data_time": 0.12963173389434815, "grad_norm": 0.3148514047265053, "loss": 0.4729187905788422, "time": 1.571388053894043, "epoch": 146, "memory": 35394, "step": 45485}
{"lr": 0.0014997440049182929, "data_time": 0.0030606746673583984, "grad_norm": 0.3498793810606003, "loss": 0.47554251849651336, "time": 1.5870272874832154, "epoch": 146, "memory": 35394, "step": 45585}
{"lr": 0.0014992444554427684, "data_time": 0.0042170524597167965, "grad_norm": 0.26608274430036544, "loss": 0.4662046581506729, "time": 1.6061805009841919, "epoch": 146, "memory": 35394, "step": 45685}
{"lr": 0.001498678571168757, "data_time": 0.11592071056365967, "grad_norm": 0.31383865922689436, "loss": 0.47294823825359344, "time": 1.5943724870681764, "epoch": 147, "memory": 35394, "step": 45798}
{"lr": 0.001498176556478923, "data_time": 0.002811574935913086, "grad_norm": 0.2796703398227692, "loss": 0.46794947385787966, "time": 1.581102204322815, "epoch": 147, "memory": 35394, "step": 45898}
{"lr": 0.0014976733857112753, "data_time": 0.0039771080017089845, "grad_norm": 0.30807247161865237, "loss": 0.4732044190168381, "time": 1.5752888441085815, "epoch": 147, "memory": 35394, "step": 45998}
{"lr": 0.0014971034125082021, "data_time": 0.10904817581176758, "grad_norm": 0.2918838158249855, "loss": 0.4651418596506119, "time": 1.624099326133728, "epoch": 148, "memory": 35394, "step": 46111}
{"lr": 0.0014965977820745636, "data_time": 0.002595210075378418, "grad_norm": 0.2893535643815994, "loss": 0.47417936027050017, "time": 1.6079826593399047, "epoch": 148, "memory": 35394, "step": 46211}
{"lr": 0.0014960909981773312, "data_time": 0.0042816162109375, "grad_norm": 0.3283232867717743, "loss": 0.47832641899585726, "time": 1.566039228439331, "epoch": 148, "memory": 35394, "step": 46311}
{"lr": 0.0014955169452914195, "data_time": 0.13000166416168213, "grad_norm": 0.3233863115310669, "loss": 0.47028444707393646, "time": 1.5834303140640258, "epoch": 149, "memory": 35394, "step": 46424}
{"lr": 0.0014950077073164205, "data_time": 0.0029964447021484375, "grad_norm": 0.2751506999135017, "loss": 0.4683986932039261, "time": 1.6370922803878785, "epoch": 149, "memory": 35394, "step": 46524}
{"lr": 0.0014944973185107568, "data_time": 0.003937196731567383, "grad_norm": 0.2857546329498291, "loss": 0.4748973399400711, "time": 1.5427146434783936, "epoch": 149, "memory": 35394, "step": 46624}
{"lr": 0.0014939191952544138, "data_time": 0.36005001068115233, "grad_norm": 0.28427477180957794, "loss": 0.4741572141647339, "time": 1.7827362298965455, "epoch": 150, "memory": 35394, "step": 46737}
{"lr": 0.0014934063579990196, "data_time": 0.002691912651062012, "grad_norm": 0.3325843170285225, "loss": 0.4628150224685669, "time": 1.5725020885467529, "epoch": 150, "memory": 35394, "step": 46837}
{"lr": 0.0014928923725645578, "data_time": 0.004363369941711426, "grad_norm": 0.3092470571398735, "loss": 0.47266780138015746, "time": 1.5830974578857422, "epoch": 150, "memory": 35394, "step": 46937}
{"lr": 0.0014923101883162165, "data_time": 0.07581276893615722, "grad_norm": 0.270275866985321, "loss": 0.47284541428089144, "time": 1.304596757888794, "epoch": 151, "memory": 35394, "step": 47050}
{"lr": 0.0014917937600997767, "data_time": 0.0027956247329711916, "grad_norm": 0.27377980053424833, "loss": 0.4696218729019165, "time": 1.6255110502243042, "epoch": 151, "memory": 35394, "step": 47150}
{"lr": 0.0014912761863744958, "data_time": 0.0036408662796020507, "grad_norm": 0.31645669639110563, "loss": 0.46602497398853304, "time": 1.605357789993286, "epoch": 151, "memory": 35394, "step": 47250}
{"lr": 0.0014906899505784726, "data_time": 0.30923726558685305, "grad_norm": 0.3091286808252335, "loss": 0.473709636926651, "time": 1.5670611381530761, "epoch": 152, "memory": 35394, "step": 47363}
{"lr": 0.0014901699397785937, "data_time": 0.00244600772857666, "grad_norm": 0.3084916979074478, "loss": 0.4761521428823471, "time": 1.6771214246749877, "epoch": 152, "memory": 35394, "step": 47463}
{"lr": 0.0014896487861586812, "data_time": 0.003490114212036133, "grad_norm": 0.32409184277057645, "loss": 0.4705113410949707, "time": 1.6042071342468263, "epoch": 152, "memory": 35394, "step": 47563}
{"lr": 0.0014890585083250137, "data_time": 0.3131188154220581, "grad_norm": 0.3107855349779129, "loss": 0.47767581343650817, "time": 1.6287238121032714, "epoch": 153, "memory": 35394, "step": 47676}
{"lr": 0.0014885349233774183, "data_time": 0.003446507453918457, "grad_norm": 0.3455690875649452, "loss": 0.46917939484119414, "time": 1.6564335346221923, "epoch": 153, "memory": 35394, "step": 47776}
{"lr": 0.001488010198317138, "data_time": 0.005722451210021973, "grad_norm": 0.3352196663618088, "loss": 0.4713617116212845, "time": 1.603764271736145, "epoch": 153, "memory": 35394, "step": 47876}
{"lr": 0.001487415888021436, "data_time": 0.26746397018432616, "grad_norm": 0.32187616676092146, "loss": 0.46817113757133483, "time": 1.619875693321228, "epoch": 154, "memory": 35394, "step": 47989}
{"lr": 0.001486888737419829, "data_time": 0.0026735305786132813, "grad_norm": 0.30717985332012177, "loss": 0.46861488223075864, "time": 1.6405192136764526, "epoch": 154, "memory": 35394, "step": 48089}
{"lr": 0.0014863604494313777, "data_time": 0.002885746955871582, "grad_norm": 0.36062075942754745, "loss": 0.4672182947397232, "time": 1.568631935119629, "epoch": 154, "memory": 35394, "step": 48189}
{"lr": 0.001485762116314666, "data_time": 0.002520465850830078, "grad_norm": 0.2964134067296982, "loss": 0.4744022965431213, "time": 1.5978061199188232, "epoch": 155, "memory": 35394, "step": 48302}
{"lr": 0.0014852314086105944, "data_time": 0.0031003713607788085, "grad_norm": 0.30355859696865084, "loss": 0.4700329899787903, "time": 1.526203417778015, "epoch": 155, "memory": 35394, "step": 48402}
{"lr": 0.001484699566263972, "data_time": 0.0029410600662231447, "grad_norm": 0.2993287891149521, "loss": 0.4735792279243469, "time": 1.58156898021698, "epoch": 155, "memory": 35394, "step": 48502}
{"lr": 0.0014840972200325323, "data_time": 0.5037097454071044, "grad_norm": 0.30623253285884855, "loss": 0.47681485414505004, "time": 1.6003986835479735, "epoch": 156, "memory": 35394, "step": 48615}
{"lr": 0.0014835629638352446, "data_time": 0.001956534385681152, "grad_norm": 0.2953531578183174, "loss": 0.4732819557189941, "time": 1.6578787326812745, "epoch": 156, "memory": 35394, "step": 48715}
{"lr": 0.0014830275757581064, "data_time": 0.003377413749694824, "grad_norm": 0.3330954760313034, "loss": 0.4673239678144455, "time": 1.5725520610809327, "epoch": 156, "memory": 35394, "step": 48815}
{"lr": 0.0014824212261833267, "data_time": 0.2841099977493286, "grad_norm": 0.28484435975551603, "loss": 0.47126721739768984, "time": 1.6001540899276734, "epoch": 157, "memory": 35394, "step": 48928}
{"lr": 0.001481883430159637, "data_time": 0.0023694992065429687, "grad_norm": 0.3198865905404091, "loss": 0.47070844769477843, "time": 1.552884840965271, "epoch": 157, "memory": 35394, "step": 49028}
{"lr": 0.0014813445050371591, "data_time": 0.0029846906661987306, "grad_norm": 0.33208000361919404, "loss": 0.4663455218076706, "time": 1.518911027908325, "epoch": 157, "memory": 35394, "step": 49128}
{"lr": 0.001480734161955368, "data_time": 0.3941207408905029, "grad_norm": 0.3433395907282829, "loss": 0.4715517222881317, "time": 1.638029408454895, "epoch": 158, "memory": 35394, "step": 49241}
{"lr": 0.0014801928348295173, "data_time": 0.0031558752059936525, "grad_norm": 0.3192254096269608, "loss": 0.4763785690069199, "time": 1.62542622089386, "epoch": 158, "memory": 35394, "step": 49341}
{"lr": 0.0014796503814042544, "data_time": 0.002646923065185547, "grad_norm": 0.28821857571601867, "loss": 0.47571446299552916, "time": 1.61024751663208, "epoch": 158, "memory": 35394, "step": 49441}
{"lr": 0.0014790360547165633, "data_time": 0.01858358383178711, "grad_norm": 0.3017184779047966, "loss": 0.4725464254617691, "time": 1.6596276760101318, "epoch": 159, "memory": 35394, "step": 49554}
{"lr": 0.0014784912052700726, "data_time": 0.0025273561477661133, "grad_norm": 0.28465391099452975, "loss": 0.4716621726751328, "time": 1.6250368356704712, "epoch": 159, "memory": 35394, "step": 49654}
{"lr": 0.0014779452323418166, "data_time": 0.002746248245239258, "grad_norm": 0.300199456512928, "loss": 0.47358458638191225, "time": 1.4314955234527589, "epoch": 159, "memory": 35394, "step": 49754}
{"lr": 0.0014773269320139627, "data_time": 0.18355016708374022, "grad_norm": 0.3220036417245865, "loss": 0.4755717903375626, "time": 1.629727339744568, "epoch": 160, "memory": 35394, "step": 49867}
{"lr": 0.0014767785690854918, "data_time": 0.0024678945541381837, "grad_norm": 0.23047104328870774, "loss": 0.4716655194759369, "time": 1.5990084648132323, "epoch": 160, "memory": 35394, "step": 49967}
{"lr": 0.0014762290855111326, "data_time": 0.002290940284729004, "grad_norm": 0.3267011672258377, "loss": 0.47424568235874176, "time": 1.5999961137771606, "epoch": 160, "memory": 35394, "step": 50067}
{"lr": 0.0014756068215733087, "data_time": 0.33799846172332765, "grad_norm": 0.29575192034244535, "loss": 0.4722161263227463, "time": 1.6328387022018434, "epoch": 161, "memory": 35394, "step": 50180}
{"lr": 0.001475054954058516, "data_time": 0.0021759510040283204, "grad_norm": 0.299102284014225, "loss": 0.4716184318065643, "time": 1.5924754381179809, "epoch": 161, "memory": 35394, "step": 50280}
{"lr": 0.001474501968751886, "data_time": 0.0033112525939941405, "grad_norm": 0.32757401317358015, "loss": 0.46959575414657595, "time": 1.5923465013504028, "epoch": 161, "memory": 35394, "step": 50380}
{"lr": 0.0014738757512985882, "data_time": 0.34128973484039304, "grad_norm": 0.310217122733593, "loss": 0.4665906071662903, "time": 1.632906436920166, "epoch": 162, "memory": 35394, "step": 50493}
{"lr": 0.0014733203881499815, "data_time": 0.04834134578704834, "grad_norm": 0.28347910195589066, "loss": 0.47699289321899413, "time": 1.6181833505630494, "epoch": 162, "memory": 35394, "step": 50593}
{"lr": 0.001472763910081725, "data_time": 0.003319573402404785, "grad_norm": 0.31539918333292005, "loss": 0.465120866894722, "time": 1.5863910913467407, "epoch": 162, "memory": 35394, "step": 50693}
{"lr": 0.0014721337492715834, "data_time": 0.15201196670532227, "grad_norm": 0.3342414930462837, "loss": 0.47482910454273225, "time": 1.6119104146957397, "epoch": 163, "memory": 35394, "step": 50806}
{"lr": 0.0014715748994983814, "data_time": 0.002240300178527832, "grad_norm": 0.3072001338005066, "loss": 0.46688192784786225, "time": 1.6734078884124757, "epoch": 163, "memory": 35394, "step": 50906}
{"lr": 0.001471014937695802, "data_time": 0.0037619829177856444, "grad_norm": 0.3387645870447159, "loss": 0.47213382720947267, "time": 1.5791921377182008, "epoch": 163, "memory": 35394, "step": 51006}
{"lr": 0.0014703808437514154, "data_time": 0.31424460411071775, "grad_norm": 0.2779496029019356, "loss": 0.47706534564495084, "time": 1.5299838542938233, "epoch": 164, "memory": 35394, "step": 51119}
{"lr": 0.001469818516419394, "data_time": 0.10404837131500244, "grad_norm": 0.3318481624126434, "loss": 0.4695691138505936, "time": 1.581681203842163, "epoch": 164, "memory": 35394, "step": 51219}
{"lr": 0.0014692550799663042, "data_time": 0.004044198989868164, "grad_norm": 0.3341397136449814, "loss": 0.47169141471385956, "time": 1.6099351644515991, "epoch": 164, "memory": 35394, "step": 51319}
{"lr": 0.0014686170631740764, "data_time": 0.19734044075012208, "grad_norm": 0.3353234946727753, "loss": 0.4649890035390854, "time": 1.5579856157302856, "epoch": 165, "memory": 35394, "step": 51432}
{"lr": 0.0014680512674054252, "data_time": 0.002067089080810547, "grad_norm": 0.3522042319178581, "loss": 0.47065396308898927, "time": 1.5947278976440429, "epoch": 165, "memory": 35394, "step": 51532}
{"lr": 0.0014674843654420126, "data_time": 0.0027135372161865234, "grad_norm": 0.3112789332866669, "loss": 0.471019035577774, "time": 1.6261058568954467, "epoch": 165, "memory": 35394, "step": 51632}
{"lr": 0.001466842436151985, "data_time": 0.22865581512451172, "grad_norm": 0.3128651022911072, "loss": 0.4680274575948715, "time": 1.6469911336898804, "epoch": 166, "memory": 35394, "step": 51745}
{"lr": 0.0014662731811251623, "data_time": 0.0021335363388061525, "grad_norm": 0.3000779315829277, "loss": 0.4695570707321167, "time": 1.6142293214797974, "epoch": 166, "memory": 35394, "step": 51845}
{"lr": 0.0014657028228478267, "data_time": 0.0025188446044921873, "grad_norm": 0.2814828917384148, "loss": 0.469571641087532, "time": 1.6100864171981812, "epoch": 166, "memory": 35394, "step": 51945}
{"lr": 0.0014650569914735098, "data_time": 0.31082701683044434, "grad_norm": 0.3241402983665466, "loss": 0.4728777289390564, "time": 1.583799386024475, "epoch": 167, "memory": 35394, "step": 52058}
{"lr": 0.0014644842864230845, "data_time": 0.002270364761352539, "grad_norm": 0.3354922115802765, "loss": 0.4704232454299927, "time": 1.6061145782470703, "epoch": 167, "memory": 35394, "step": 52158}
{"lr": 0.0014639104810842945, "data_time": 0.002911663055419922, "grad_norm": 0.29982946515083314, "loss": 0.472779381275177, "time": 1.5436385869979858, "epoch": 167, "memory": 35394, "step": 52258}
{"lr": 0.0014632607581024956, "data_time": 0.236198353767395, "grad_norm": 0.2983123317360878, "loss": 0.4703411251306534, "time": 1.590097689628601, "epoch": 168, "memory": 35394, "step": 52371}
{"lr": 0.0014626846123190105, "data_time": 0.0023493051528930666, "grad_norm": 0.28447268456220626, "loss": 0.4725374698638916, "time": 1.276786708831787, "epoch": 168, "memory": 35394, "step": 52471}
{"lr": 0.0014621073692271571, "data_time": 0.003572845458984375, "grad_norm": 0.3292911469936371, "loss": 0.47605222165584565, "time": 1.5832063913345338, "epoch": 168, "memory": 35394, "step": 52571}
{"lr": 0.0014614537651778201, "data_time": 0.278347110748291, "grad_norm": 0.2785572946071625, "loss": 0.46927526891231536, "time": 1.6264643669128418, "epoch": 169, "memory": 35394, "step": 52684}
{"lr": 0.0014608741880076331, "data_time": 0.0021438837051391602, "grad_norm": 0.307490274310112, "loss": 0.4716922968626022, "time": 1.5528220415115357, "epoch": 169, "memory": 35394, "step": 52784}
{"lr": 0.0014602935165268732, "data_time": 0.002855539321899414, "grad_norm": 0.36514215767383573, "loss": 0.46926130950450895, "time": 1.5972035884857179, "epoch": 169, "memory": 35394, "step": 52884}
{"lr": 0.0014596360420128986, "data_time": 0.20681099891662597, "grad_norm": 0.29095001220703126, "loss": 0.4657418817281723, "time": 1.6126886367797852, "epoch": 170, "memory": 35394, "step": 52997}
{"lr": 0.0014590530428580295, "data_time": 0.00203859806060791, "grad_norm": 0.3109633415937424, "loss": 0.4715391516685486, "time": 1.6293039560317992, "epoch": 170, "memory": 35394, "step": 53097}
{"lr": 0.001458468952408137, "data_time": 0.002341914176940918, "grad_norm": 0.3432395398616791, "loss": 0.47024751603603365, "time": 1.6045777797698975, "epoch": 170, "memory": 35394, "step": 53197}
{"lr": 0.0014578076180952126, "data_time": 0.4278813600540161, "grad_norm": 0.28545158803462983, "loss": 0.4749287784099579, "time": 1.5625330209732056, "epoch": 171, "memory": 35394, "step": 53310}
{"lr": 0.0014572212064131952, "data_time": 0.2194120168685913, "grad_norm": 0.2808655545115471, "loss": 0.46978034675121305, "time": 1.6377522706985475, "epoch": 171, "memory": 35394, "step": 53410}
{"lr": 0.001456633706469405, "data_time": 0.0037796497344970703, "grad_norm": 0.29820395857095716, "loss": 0.4692943930625916, "time": 1.6022152662277223, "epoch": 171, "memory": 35394, "step": 53510}
{"lr": 0.0014559685230858344, "data_time": 0.2698643207550049, "grad_norm": 0.28383616656064986, "loss": 0.47374018728733064, "time": 1.5898842811584473, "epoch": 172, "memory": 35394, "step": 53623}
{"lr": 0.0014553787083895606, "data_time": 0.002063441276550293, "grad_norm": 0.2995751708745956, "loss": 0.4736219525337219, "time": 1.55423846244812, "epoch": 172, "memory": 35394, "step": 53723}
{"lr": 0.0014547878084824211, "data_time": 0.0031712532043457033, "grad_norm": 0.32420649826526643, "loss": 0.46626289188861847, "time": 1.3738911867141723, "epoch": 172, "memory": 35394, "step": 53823}
{"lr": 0.001454118786818946, "data_time": 0.2584169626235962, "grad_norm": 0.25881040543317796, "loss": 0.4685127228498459, "time": 1.603907012939453, "epoch": 173, "memory": 35394, "step": 53936}
{"lr": 0.0014535255786765117, "data_time": 0.002315783500671387, "grad_norm": 0.2762638658285141, "loss": 0.4655046910047531, "time": 1.6250131845474243, "epoch": 173, "memory": 35394, "step": 54036}
{"lr": 0.0014529312883917215, "data_time": 0.0035016536712646484, "grad_norm": 0.3053190350532532, "loss": 0.4688581496477127, "time": 1.5600362300872803, "epoch": 173, "memory": 35394, "step": 54136}
{"lr": 0.0014522584393013495, "data_time": 0.2874422073364258, "grad_norm": 0.2978756472468376, "loss": 0.4710650771856308, "time": 1.6710335254669189, "epoch": 174, "memory": 35394, "step": 54249}
{"lr": 0.0014516618473359004, "data_time": 0.0025964975357055664, "grad_norm": 0.29748319238424303, "loss": 0.46659042239189147, "time": 1.6110329627990723, "epoch": 174, "memory": 35394, "step": 54349}
{"lr": 0.001451064176314162, "data_time": 0.0029291391372680666, "grad_norm": 0.3164562672376633, "loss": 0.4681436628103256, "time": 1.6408367872238159, "epoch": 174, "memory": 35394, "step": 54449}
{"lr": 0.0014503875107119903, "data_time": 0.17496986389160157, "grad_norm": 0.3020316809415817, "loss": 0.4737392753362656, "time": 1.6015629768371582, "epoch": 175, "memory": 35394, "step": 54562}
{"lr": 0.0014497875446015638, "data_time": 0.002031087875366211, "grad_norm": 0.2829320028424263, "loss": 0.4682610988616943, "time": 1.578852105140686, "epoch": 175, "memory": 35394, "step": 54662}
{"lr": 0.001449186502538418, "data_time": 0.002847743034362793, "grad_norm": 0.2892738938331604, "loss": 0.47381678521633147, "time": 1.5498024463653564, "epoch": 175, "memory": 35394, "step": 54762}
{"lr": 0.0014485060314014574, "data_time": 0.16363167762756348, "grad_norm": 0.28218656182289126, "loss": 0.46830440759658815, "time": 1.6810015201568604, "epoch": 176, "memory": 35394, "step": 54875}
{"lr": 0.0014479027008788316, "data_time": 0.00220334529876709, "grad_norm": 0.2982559084892273, "loss": 0.4742135554552078, "time": 1.6480063915252685, "epoch": 176, "memory": 35394, "step": 54975}
{"lr": 0.0014472982975245105, "data_time": 0.0037336111068725585, "grad_norm": 0.27118540108203887, "loss": 0.4674179136753082, "time": 1.5772485733032227, "epoch": 176, "memory": 35394, "step": 55075}
{"lr": 0.0014466140318915035, "data_time": 0.5327614307403564, "grad_norm": 0.27598078846931456, "loss": 0.470238932967186, "time": 1.5908808946609496, "epoch": 177, "memory": 35394, "step": 55188}
{"lr": 0.0014460073467440278, "data_time": 0.0022248029708862305, "grad_norm": 0.3165100574493408, "loss": 0.47076371908187864, "time": 1.6150795221328735, "epoch": 177, "memory": 35394, "step": 55288}
{"lr": 0.0014453995919032861, "data_time": 0.003454256057739258, "grad_norm": 0.31444724053144457, "loss": 0.4729907035827637, "time": 1.5501599788665772, "epoch": 177, "memory": 35394, "step": 55388}
{"lr": 0.0014447115428745344, "data_time": 0.23338491916656495, "grad_norm": 0.34854787588119507, "loss": 0.47006424963474275, "time": 1.5342402458190918, "epoch": 178, "memory": 35394, "step": 55501}
{"lr": 0.0014441015129439845, "data_time": 0.0028392553329467775, "grad_norm": 0.2601276978850365, "loss": 0.47614893317222595, "time": 1.6145290374755858, "epoch": 178, "memory": 35394, "step": 55601}
{"lr": 0.0014434904164759472, "data_time": 0.004730057716369629, "grad_norm": 0.27629228979349135, "loss": 0.47069774866104125, "time": 1.5893383741378784, "epoch": 178, "memory": 35394, "step": 55701}
{"lr": 0.0014427985952131226, "data_time": 0.12421751022338867, "grad_norm": 0.36174584925174713, "loss": 0.46768505275249483, "time": 1.5267077445983888, "epoch": 179, "memory": 35394, "step": 55814}
{"lr": 0.0014421852303955272, "data_time": 0.0026221513748168946, "grad_norm": 0.3133387237787247, "loss": 0.4689624786376953, "time": 1.599146795272827, "epoch": 179, "memory": 35394, "step": 55914}
{"lr": 0.0014415708022135281, "data_time": 0.0062283754348754885, "grad_norm": 0.286565825343132, "loss": 0.4702074468135834, "time": 1.5799947261810303, "epoch": 179, "memory": 35394, "step": 56014}
{"lr": 0.0014408752199394999, "data_time": 0.0936781644821167, "grad_norm": 0.3179437130689621, "loss": 0.46492821872234347, "time": 1.6922754764556884, "epoch": 180, "memory": 35394, "step": 56127}
{"lr": 0.001440258530184993, "data_time": 0.003398895263671875, "grad_norm": 0.309399738907814, "loss": 0.46923362612724306, "time": 1.5171862602233888, "epoch": 180, "memory": 35394, "step": 56227}
{"lr": 0.0014396407802564157, "data_time": 0.003466939926147461, "grad_norm": 0.3000945746898651, "loss": 0.4666789710521698, "time": 1.59686222076416, "epoch": 180, "memory": 35394, "step": 56327}
{"lr": 0.0014389414482550635, "data_time": 0.4198293924331665, "grad_norm": 0.301208359003067, "loss": 0.4721845418214798, "time": 1.5670516967773438, "epoch": 181, "memory": 35394, "step": 56440}
{"lr": 0.001438321443567713, "data_time": 0.0029335737228393553, "grad_norm": 0.28934227526187895, "loss": 0.4732853710651398, "time": 1.5850979328155517, "epoch": 181, "memory": 35394, "step": 56540}
{"lr": 0.001437700381913822, "data_time": 0.003963828086853027, "grad_norm": 0.293257175385952, "loss": 0.46927026510238645, "time": 1.579022479057312, "epoch": 181, "memory": 35394, "step": 56640}
{"lr": 0.0014369973115298555, "data_time": 0.21404502391815186, "grad_norm": 0.3002420857548714, "loss": 0.4685739129781723, "time": 1.5752322912216186, "epoch": 182, "memory": 35394, "step": 56753}
{"lr": 0.0014363740019675056, "data_time": 0.002151966094970703, "grad_norm": 0.3131723076105118, "loss": 0.4733766555786133, "time": 1.5863263607025146, "epoch": 182, "memory": 35394, "step": 56853}
{"lr": 0.0014357496386632933, "data_time": 0.0029147863388061523, "grad_norm": 0.2934686213731766, "loss": 0.46085466742515563, "time": 1.5614045143127442, "epoch": 182, "memory": 35394, "step": 56953}
{"lr": 0.0014350428413020641, "data_time": 0.4029378890991211, "grad_norm": 0.3074746787548065, "loss": 0.4652681887149811, "time": 1.5961194515228272, "epoch": 183, "memory": 35394, "step": 57066}
{"lr": 0.0014344162369761724, "data_time": 0.0022130966186523437, "grad_norm": 0.319979290664196, "loss": 0.4741803497076035, "time": 1.6120948314666748, "epoch": 183, "memory": 35394, "step": 57166}
{"lr": 0.001433788582150189, "data_time": 0.0027157068252563477, "grad_norm": 0.2657346561551094, "loss": 0.47120038568973543, "time": 1.6111570358276368, "epoch": 183, "memory": 35394, "step": 57266}
{"lr": 0.0014330780692775127, "data_time": 0.3469107151031494, "grad_norm": 0.32320293188095095, "loss": 0.47329621613025663, "time": 1.5842166185379027, "epoch": 184, "memory": 35394, "step": 57379}
{"lr": 0.0014324481803529854, "data_time": 0.0025513172149658203, "grad_norm": 0.2774762958288193, "loss": 0.4724943101406097, "time": 1.6055054187774658, "epoch": 184, "memory": 35394, "step": 57479}
{"lr": 0.0014318172441871784, "data_time": 0.0030506610870361327, "grad_norm": 0.332763808965683, "loss": 0.47426392138004303, "time": 1.5911387920379638, "epoch": 184, "memory": 35394, "step": 57579}
{"lr": 0.0014311030273291433, "data_time": 0.2924496173858643, "grad_norm": 0.2631110042333603, "loss": 0.4737377554178238, "time": 1.5873345851898193, "epoch": 185, "memory": 35394, "step": 57692}
{"lr": 0.0014304698640241688, "data_time": 0.002006268501281738, "grad_norm": 0.35392642617225645, "loss": 0.4733426034450531, "time": 1.556911253929138, "epoch": 185, "memory": 35394, "step": 57792}
{"lr": 0.0014298356567537173, "data_time": 0.0022449493408203125, "grad_norm": 0.3138518825173378, "loss": 0.4697775930166245, "time": 1.4571951627731323, "epoch": 185, "memory": 35394, "step": 57892}
{"lr": 0.0014291177474964954, "data_time": 0.30384483337402346, "grad_norm": 0.32021559625864027, "loss": 0.46878900527954104, "time": 1.6219077348709106, "epoch": 186, "memory": 35394, "step": 58005}
{"lr": 0.0014284813200823798, "data_time": 0.0021825790405273437, "grad_norm": 0.278409406542778, "loss": 0.46759899854660036, "time": 1.6315452575683593, "epoch": 186, "memory": 35394, "step": 58105}
{"lr": 0.0014278438519955234, "data_time": 0.0026964902877807616, "grad_norm": 0.32632515728473666, "loss": 0.4722230076789856, "time": 1.5680049896240233, "epoch": 186, "memory": 35394, "step": 58205}
{"lr": 0.001427122261985189, "data_time": 0.29119601249694826, "grad_norm": 0.3111144363880157, "loss": 0.47364978194236756, "time": 1.6600635766983032, "epoch": 187, "memory": 35394, "step": 58318}
{"lr": 0.0014264825807861912, "data_time": 0.0023880720138549803, "grad_norm": 0.31201342642307284, "loss": 0.4716892570257187, "time": 1.5811436653137207, "epoch": 187, "memory": 35394, "step": 58418}
{"lr": 0.0014258418622240705, "data_time": 0.0029753923416137697, "grad_norm": 0.3095570519566536, "loss": 0.46616479754447937, "time": 1.6050919055938722, "epoch": 187, "memory": 35394, "step": 58518}
{"lr": 0.0014251166031664056, "data_time": 0.07426257133483886, "grad_norm": 0.30708187520504, "loss": 0.4670454740524292, "time": 1.6614079236984254, "epoch": 188, "memory": 35394, "step": 58631}
{"lr": 0.0014244736785595654, "data_time": 0.0022929906845092773, "grad_norm": 0.3182269066572189, "loss": 0.4683115690946579, "time": 2.282057785987854, "epoch": 188, "memory": 35394, "step": 58731}
{"lr": 0.0014238297199160505, "data_time": 0.0030943632125854494, "grad_norm": 0.28474934250116346, "loss": 0.47630409300327303, "time": 1.5549591541290284, "epoch": 188, "memory": 35394, "step": 58831}
{"lr": 0.001423100803576357, "data_time": 0.45864837169647216, "grad_norm": 0.28228756636381147, "loss": 0.46735852062702177, "time": 1.6342482328414918, "epoch": 189, "memory": 35394, "step": 58944}
{"lr": 0.0014224546459913313, "data_time": 0.002034497261047363, "grad_norm": 0.32028891146183014, "loss": 0.46726868152618406, "time": 1.5758291244506837, "epoch": 189, "memory": 35394, "step": 59044}
{"lr": 0.001421807457712852, "data_time": 0.0034200191497802735, "grad_norm": 0.30193726867437365, "loss": 0.4763247847557068, "time": 1.514237141609192, "epoch": 189, "memory": 35394, "step": 59144}
{"lr": 0.0014210748959157641, "data_time": 0.24640169143676757, "grad_norm": 0.3181789666414261, "loss": 0.47425582706928254, "time": 1.6561965465545654, "epoch": 190, "memory": 35394, "step": 59257}
{"lr": 0.0014204255158346553, "data_time": 0.0022960662841796874, "grad_norm": 0.3129463344812393, "loss": 0.46886166632175447, "time": 1.6395469188690186, "epoch": 190, "memory": 35394, "step": 59357}
{"lr": 0.0014197751084200334, "data_time": 0.002558588981628418, "grad_norm": 0.3266394555568695, "loss": 0.4651120901107788, "time": 1.605110502243042, "epoch": 190, "memory": 35394, "step": 59457}
{"lr": 0.0014190389130493192, "data_time": 0.27958357334136963, "grad_norm": 0.29088290184736254, "loss": 0.4646313667297363, "time": 1.6675094604492187, "epoch": 191, "memory": 35394, "step": 59570}
{"lr": 0.0014183863210065054, "data_time": 0.0021049976348876953, "grad_norm": 0.3474822103977203, "loss": 0.47486760318279264, "time": 1.6789525032043457, "epoch": 191, "memory": 35394, "step": 59670}
{"lr": 0.001417732705006786, "data_time": 0.002956652641296387, "grad_norm": 0.3031906604766846, "loss": 0.4728790134191513, "time": 1.8052198886871338, "epoch": 191, "memory": 35394, "step": 59770}
{"lr": 0.0014169928880051574, "data_time": 0.3092926502227783, "grad_norm": 0.26986821442842485, "loss": 0.4689634680747986, "time": 1.8279314517974854, "epoch": 192, "memory": 35394, "step": 59883}
{"lr": 0.0014163370945871234, "data_time": 0.002117300033569336, "grad_norm": 0.35809345841407775, "loss": 0.4633689522743225, "time": 1.753304648399353, "epoch": 192, "memory": 35394, "step": 59983}
{"lr": 0.0014156802806054001, "data_time": 0.002707672119140625, "grad_norm": 0.29332645386457445, "loss": 0.47241308391094206, "time": 1.7095742225646973, "epoch": 192, "memory": 35394, "step": 60083}
{"lr": 0.0014149368539743263, "data_time": 0.001876974105834961, "grad_norm": 0.32788383066654203, "loss": 0.4673388868570328, "time": 1.6230338096618653, "epoch": 193, "memory": 35394, "step": 60196}
{"lr": 0.001414277869819487, "data_time": 0.002238821983337402, "grad_norm": 0.3523511916399002, "loss": 0.4761125177145004, "time": 1.696906042098999, "epoch": 193, "memory": 35394, "step": 60296}
{"lr": 0.0014136178685107326, "data_time": 0.002200603485107422, "grad_norm": 0.2639012783765793, "loss": 0.47108382284641265, "time": 1.616952610015869, "epoch": 193, "memory": 35394, "step": 60396}
{"lr": 0.0014128708443102336, "data_time": 0.3051146984100342, "grad_norm": 0.3452139988541603, "loss": 0.46495519280433656, "time": 1.7012290477752685, "epoch": 194, "memory": 35394, "step": 60509}
{"lr": 0.0014122086801087689, "data_time": 0.001993131637573242, "grad_norm": 0.32682437002658843, "loss": 0.46214122176170347, "time": 1.5826884031295776, "epoch": 194, "memory": 35394, "step": 60609}
{"lr": 0.0014115455021796617, "data_time": 0.0025884628295898436, "grad_norm": 0.35147075951099394, "loss": 0.4695694923400879, "time": 1.6295403718948365, "epoch": 194, "memory": 35394, "step": 60709}
{"lr": 0.001410794892528118, "data_time": 0.0018650293350219727, "grad_norm": 0.2854113057255745, "loss": 0.4676926791667938, "time": 1.7151899337768555, "epoch": 195, "memory": 35394, "step": 60822}
{"lr": 0.0014101295590217902, "data_time": 0.0018543243408203126, "grad_norm": 0.2882421463727951, "loss": 0.45659250020980835, "time": 1.8125338077545166, "epoch": 195, "memory": 35394, "step": 60922}
{"lr": 0.0014094632152305425, "data_time": 0.00273284912109375, "grad_norm": 0.32339481115341184, "loss": 0.4707329511642456, "time": 1.6041362524032592, "epoch": 195, "memory": 35394, "step": 61022}
{"lr": 0.0014087090323045008, "data_time": 0.12534875869750978, "grad_norm": 0.253033821284771, "loss": 0.46790191531181335, "time": 1.695911717414856, "epoch": 196, "memory": 35394, "step": 61135}
{"lr": 0.0014080405402864908, "data_time": 0.003158426284790039, "grad_norm": 0.3074509233236313, "loss": 0.4644608527421951, "time": 1.6996809482574462, "epoch": 196, "memory": 35394, "step": 61235}
{"lr": 0.0014073710414426676, "data_time": 0.0039783954620361325, "grad_norm": 0.31372160762548446, "loss": 0.4706553131341934, "time": 2.20734121799469, "epoch": 196, "memory": 35394, "step": 61335}
{"lr": 0.0014066132974766379, "data_time": 0.5167878866195679, "grad_norm": 0.31866446137428284, "loss": 0.4713194131851196, "time": 1.7400450468063355, "epoch": 197, "memory": 35394, "step": 61448}
{"lr": 0.0014059416577913602, "data_time": 0.296636962890625, "grad_norm": 0.3023708999156952, "loss": 0.4662445574998856, "time": 1.6764413595199585, "epoch": 197, "memory": 35394, "step": 61548}
{"lr": 0.0014052690147557101, "data_time": 0.07198173999786377, "grad_norm": 0.32717117965221404, "loss": 0.46478070318698883, "time": 1.7123879671096802, "epoch": 197, "memory": 35394, "step": 61648}
{"lr": 0.0014045077220419752, "data_time": 0.41347978115081785, "grad_norm": 0.29966471940279005, "loss": 0.4765000373125076, "time": 1.673772120475769, "epoch": 198, "memory": 35394, "step": 61761}
{"lr": 0.00140383294558491, "data_time": 0.0028383731842041016, "grad_norm": 0.348673552274704, "loss": 0.4723760575056076, "time": 1.742823100090027, "epoch": 198, "memory": 35394, "step": 61861}
{"lr": 0.0014031571692691908, "data_time": 0.003871726989746094, "grad_norm": 0.27676914930343627, "loss": 0.47189496755599974, "time": 1.5643080472946167, "epoch": 198, "memory": 35394, "step": 61961}
{"lr": 0.001402392340157597, "data_time": 0.49094879627227783, "grad_norm": 0.3166297242045403, "loss": 0.46677222549915315, "time": 1.6645609140396118, "epoch": 199, "memory": 35394, "step": 62074}
{"lr": 0.0014017144378751067, "data_time": 0.0021778345108032227, "grad_norm": 0.3432962611317635, "loss": 0.4702152132987976, "time": 1.6718613624572753, "epoch": 199, "memory": 35394, "step": 62174}
{"lr": 0.0014010355392418995, "data_time": 0.002832794189453125, "grad_norm": 0.42107337713241577, "loss": 0.467466327548027, "time": 1.6448354959487914, "epoch": 199, "memory": 35394, "step": 62274}
{"lr": 0.0014002671861396618, "data_time": 0.20383217334747314, "grad_norm": 0.2863741531968117, "loss": 0.4710086613893509, "time": 1.6694660186767578, "epoch": 200, "memory": 35394, "step": 62387}
{"lr": 0.0013995861690288172, "data_time": 0.0019563913345336916, "grad_norm": 0.3309642896056175, "loss": 0.4673764258623123, "time": 1.6877433776855468, "epoch": 200, "memory": 35394, "step": 62487}
{"lr": 0.001398904159091357, "data_time": 0.002680683135986328, "grad_norm": 0.2888010397553444, "loss": 0.4665868192911148, "time": 1.680584478378296, "epoch": 200, "memory": 35394, "step": 62587}
{"lr": 0.0013981322944628628, "data_time": 0.2186347723007202, "grad_norm": 0.28719694167375565, "loss": 0.46434280276298523, "time": 1.755620574951172, "epoch": 201, "memory": 35394, "step": 62700}
{"lr": 0.0013974481735712656, "data_time": 0.002644848823547363, "grad_norm": 0.32963127493858335, "loss": 0.4668836653232574, "time": 1.714164638519287, "epoch": 201, "memory": 35394, "step": 62800}
{"lr": 0.001396763063393259, "data_time": 0.003105497360229492, "grad_norm": 0.3395594388246536, "loss": 0.4647400975227356, "time": 1.6350791215896607, "epoch": 201, "memory": 35394, "step": 62900}
{"lr": 0.0013959876997598547, "data_time": 0.22502262592315675, "grad_norm": 0.3695346564054489, "loss": 0.46441820859909055, "time": 1.7366650581359864, "epoch": 202, "memory": 35394, "step": 63013}
{"lr": 0.0013953004861854562, "data_time": 0.0021934747695922852, "grad_norm": 0.3458415284752846, "loss": 0.4737526535987854, "time": 1.7023239850997924, "epoch": 202, "memory": 35394, "step": 63113}
{"lr": 0.0013946122868809033, "data_time": 0.003093099594116211, "grad_norm": 0.3411865681409836, "loss": 0.46702808141708374, "time": 1.6514294385910033, "epoch": 202, "memory": 35394, "step": 63213}
{"lr": 0.0013938334368206985, "data_time": 0.2790762186050415, "grad_norm": 0.2994870156049728, "loss": 0.469181427359581, "time": 1.6718265771865846, "epoch": 203, "memory": 35394, "step": 63326}
{"lr": 0.0013931431417116217, "data_time": 0.002037644386291504, "grad_norm": 0.32887806594371793, "loss": 0.4600908696651459, "time": 1.6471970081329346, "epoch": 203, "memory": 35394, "step": 63426}
{"lr": 0.0013924518644446344, "data_time": 0.00296478271484375, "grad_norm": 0.31616681069135666, "loss": 0.46913267970085143, "time": 2.47383074760437, "epoch": 203, "memory": 35394, "step": 63526}
{"lr": 0.0013916695405922967, "data_time": 0.5410053730010986, "grad_norm": 0.3267045825719833, "loss": 0.46192456483840943, "time": 1.7564292430877686, "epoch": 204, "memory": 35394, "step": 63639}
{"lr": 0.0013909761751466486, "data_time": 0.0028777360916137696, "grad_norm": 0.2967778146266937, "loss": 0.47468116581439973, "time": 1.723039412498474, "epoch": 204, "memory": 35394, "step": 63739}
{"lr": 0.0013902818311312707, "data_time": 0.003860664367675781, "grad_norm": 0.3171767398715019, "loss": 0.47028414607048036, "time": 1.6971614122390748, "epoch": 204, "memory": 35394, "step": 63839}
{"lr": 0.0013894960461778181, "data_time": 0.41835479736328124, "grad_norm": 0.2926951736211777, "loss": 0.4686872035264969, "time": 1.6770302534103394, "epoch": 205, "memory": 35394, "step": 63952}
{"lr": 0.0013887996216435177, "data_time": 0.002800464630126953, "grad_norm": 0.30365120023489, "loss": 0.468902051448822, "time": 1.6939208269119264, "epoch": 205, "memory": 35394, "step": 64052}
{"lr": 0.0013881022221435403, "data_time": 0.0029721736907958986, "grad_norm": 0.3129457712173462, "loss": 0.4634545147418976, "time": 2.3847360134124758, "epoch": 205, "memory": 35394, "step": 64152}
{"lr": 0.0013873129888361427, "data_time": 0.48239998817443847, "grad_norm": 0.3587556630373001, "loss": 0.4666542649269104, "time": 1.7186654329299926, "epoch": 206, "memory": 35394, "step": 64265}
{"lr": 0.001386613516510735, "data_time": 0.16566617488861085, "grad_norm": 0.3348051249980927, "loss": 0.4615588128566742, "time": 1.7271795272827148, "epoch": 206, "memory": 35394, "step": 64365}
{"lr": 0.001385913072839518, "data_time": 0.0023594141006469727, "grad_norm": 0.2904904246330261, "loss": 0.4727581202983856, "time": 1.941850209236145, "epoch": 206, "memory": 35394, "step": 64465}
{"lr": 0.0013851204039812795, "data_time": 0.2431182861328125, "grad_norm": 0.2788445085287094, "loss": 0.4605056166648865, "time": 1.6292827129364014, "epoch": 207, "memory": 35394, "step": 64578}
{"lr": 0.0013844178952117478, "data_time": 0.0025800228118896484, "grad_norm": 0.26874462962150575, "loss": 0.46864113211631775, "time": 1.387146258354187, "epoch": 207, "memory": 35394, "step": 64678}
{"lr": 0.0013837144187320342, "data_time": 0.0032301425933837892, "grad_norm": 0.33183265179395677, "loss": 0.4628928959369659, "time": 2.219890069961548, "epoch": 207, "memory": 35394, "step": 64778}
{"lr": 0.0013829183271817946, "data_time": 0.28136119842529295, "grad_norm": 0.32084494084119797, "loss": 0.4651089072227478, "time": 1.5939666509628296, "epoch": 208, "memory": 35394, "step": 64891}
{"lr": 0.0013822127933643834, "data_time": 0.0025270938873291015, "grad_norm": 0.338676917552948, "loss": 0.46944151520729066, "time": 1.6210711002349854, "epoch": 208, "memory": 35394, "step": 64991}
{"lr": 0.001381506295488115, "data_time": 0.0034482479095458984, "grad_norm": 0.3167917937040329, "loss": 0.47350145876407623, "time": 1.576575803756714, "epoch": 208, "memory": 35394, "step": 65091}
{"lr": 0.0013807067941602363, "data_time": 0.41229188442230225, "grad_norm": 0.33148914873600005, "loss": 0.47402839064598085, "time": 1.5951038122177124, "epoch": 209, "memory": 35394, "step": 65204}
{"lr": 0.0013799982467402605, "data_time": 0.00300290584564209, "grad_norm": 0.3279234364628792, "loss": 0.4690255492925644, "time": 1.5857105731964112, "epoch": 209, "memory": 35394, "step": 65304}
{"lr": 0.0013792887389283916, "data_time": 0.003412628173828125, "grad_norm": 0.38516962230205537, "loss": 0.4658779174089432, "time": 1.6279579401016235, "epoch": 209, "memory": 35394, "step": 65404}
{"lr": 0.00137848584079255, "data_time": 0.3384826183319092, "grad_norm": 0.3273053914308548, "loss": 0.46494150459766387, "time": 1.5386008977890016, "epoch": 210, "memory": 35394, "step": 65517}
{"lr": 0.0013777742912642148, "data_time": 0.0022003173828125, "grad_norm": 0.32113461196422577, "loss": 0.4704344868659973, "time": 1.5698360443115233, "epoch": 210, "memory": 35394, "step": 65617}
{"lr": 0.0013770617850265292, "data_time": 0.002754974365234375, "grad_norm": 0.30732946246862414, "loss": 0.4663552284240723, "time": 1.5353568553924561, "epoch": 210, "memory": 35394, "step": 65717}
{"lr": 0.0013762555031075043, "data_time": 0.4402694463729858, "grad_norm": 0.3124862268567085, "loss": 0.4653278201818466, "time": 1.5268751382827759, "epoch": 211, "memory": 35394, "step": 65830}
{"lr": 0.001375540963013712, "data_time": 0.0019316434860229491, "grad_norm": 0.2851670444011688, "loss": 0.46854456663131716, "time": 1.5997052907943725, "epoch": 211, "memory": 35394, "step": 65930}
{"lr": 0.0013748254699086323, "data_time": 0.0029338836669921876, "grad_norm": 0.3307501494884491, "loss": 0.47022207677364347, "time": 1.5767280817031861, "epoch": 211, "memory": 35394, "step": 66030}
{"lr": 0.0013740158172860996, "data_time": 0.25089690685272215, "grad_norm": 0.3186151996254921, "loss": 0.4675956815481186, "time": 1.595865535736084, "epoch": 212, "memory": 35394, "step": 66143}
{"lr": 0.001373298298218268, "data_time": 0.0030724048614501954, "grad_norm": 0.30045551508665086, "loss": 0.4757577031850815, "time": 1.5971253633499145, "epoch": 212, "memory": 35394, "step": 66243}
{"lr": 0.0013725798298526736, "data_time": 0.0029916286468505858, "grad_norm": 0.3421851500868797, "loss": 0.47383575737476347, "time": 1.5623473405838013, "epoch": 212, "memory": 35394, "step": 66343}
{"lr": 0.001371766819660982, "data_time": 0.37029101848602297, "grad_norm": 0.3258655309677124, "loss": 0.4642497807741165, "time": 1.6059899091720582, "epoch": 213, "memory": 35394, "step": 66456}
{"lr": 0.001371046333258855, "data_time": 0.0029728174209594726, "grad_norm": 0.3437332421541214, "loss": 0.4666615217924118, "time": 1.6498097419738769, "epoch": 213, "memory": 35394, "step": 66556}
{"lr": 0.0013703249012878887, "data_time": 0.002724003791809082, "grad_norm": 0.3242976129055023, "loss": 0.46891663074493406, "time": 1.5578476190567017, "epoch": 213, "memory": 35394, "step": 66656}
{"lr": 0.0013695085467158605, "data_time": 0.4845156669616699, "grad_norm": 0.30233625173568723, "loss": 0.4674726963043213, "time": 1.5978281021118164, "epoch": 214, "memory": 35394, "step": 66769}
{"lr": 0.0013687851046673193, "data_time": 0.0030577898025512694, "grad_norm": 0.30258279144763944, "loss": 0.4746174097061157, "time": 1.634990644454956, "epoch": 214, "memory": 35394, "step": 66869}
{"lr": 0.0013680607207941988, "data_time": 0.0032387256622314455, "grad_norm": 0.3024058148264885, "loss": 0.46580421924591064, "time": 1.5673542737960815, "epoch": 214, "memory": 35394, "step": 66969}
{"lr": 0.0013672410350849063, "data_time": 0.2918269634246826, "grad_norm": 0.2855265200138092, "loss": 0.46471414864063265, "time": 1.5618194341659546, "epoch": 215, "memory": 35394, "step": 67082}
{"lr": 0.0013665146491257762, "data_time": 0.0021042585372924804, "grad_norm": 0.34018137902021406, "loss": 0.467884761095047, "time": 1.5780376672744751, "epoch": 215, "memory": 35394, "step": 67182}
{"lr": 0.0013657873251016078, "data_time": 0.002847433090209961, "grad_norm": 0.31072817742824554, "loss": 0.46707758605480193, "time": 1.6363652229309082, "epoch": 215, "memory": 35394, "step": 67282}
{"lr": 0.001364964321552164, "data_time": 0.28907074928283694, "grad_norm": 0.2976585805416107, "loss": 0.470560359954834, "time": 1.4349777698516846, "epoch": 216, "memory": 35394, "step": 67395}
{"lr": 0.001364235003466028, "data_time": 0.002001309394836426, "grad_norm": 0.3345088645815849, "loss": 0.4629779815673828, "time": 1.5832389831542968, "epoch": 216, "memory": 35394, "step": 67495}
{"lr": 0.0013635047510896143, "data_time": 0.0028343677520751955, "grad_norm": 0.3455568477511406, "loss": 0.4689526528120041, "time": 1.509415364265442, "epoch": 216, "memory": 35394, "step": 67595}
{"lr": 0.0013626784430509538, "data_time": 0.09550259113311768, "grad_norm": 0.30089477449655533, "loss": 0.46715850830078126, "time": 1.5824538707733153, "epoch": 217, "memory": 35394, "step": 67708}
{"lr": 0.0013619462046689625, "data_time": 0.0020072221755981444, "grad_norm": 0.3204872354865074, "loss": 0.4647522121667862, "time": 1.5395055770874024, "epoch": 217, "memory": 35394, "step": 67808}
{"lr": 0.0013612130357866074, "data_time": 0.0033783674240112304, "grad_norm": 0.31375990957021715, "loss": 0.46721204519271853, "time": 1.5612772703170776, "epoch": 217, "memory": 35394, "step": 67908}
{"lr": 0.0013603834366632684, "data_time": 0.3160696983337402, "grad_norm": 0.31197263598442077, "loss": 0.4731456577777863, "time": 1.6191839456558228, "epoch": 218, "memory": 35394, "step": 68021}
{"lr": 0.0013596482898639455, "data_time": 0.002175164222717285, "grad_norm": 0.30176934599876404, "loss": 0.4725663274526596, "time": 1.52548348903656, "epoch": 218, "memory": 35394, "step": 68121}
{"lr": 0.0013589122163692677, "data_time": 0.002683091163635254, "grad_norm": 0.32255970388650895, "loss": 0.47066466212272645, "time": 1.5282211542129516, "epoch": 218, "memory": 35394, "step": 68221}
{"lr": 0.0013580793396191814, "data_time": 0.3943022727966309, "grad_norm": 0.3046416640281677, "loss": 0.4702676832675934, "time": 1.5704753398895264, "epoch": 219, "memory": 35394, "step": 68334}
{"lr": 0.0013573412963282293, "data_time": 0.0022294998168945314, "grad_norm": 0.30341685116291045, "loss": 0.4624994218349457, "time": 1.5974154949188233, "epoch": 219, "memory": 35394, "step": 68434}
{"lr": 0.0013566023301619656, "data_time": 0.002793240547180176, "grad_norm": 0.2916345074772835, "loss": 0.46505579352378845, "time": 1.5455355405807496, "epoch": 219, "memory": 35394, "step": 68534}
{"lr": 0.0013557661892962302, "data_time": 0.25853805541992186, "grad_norm": 0.34701276272535325, "loss": 0.46456713080406187, "time": 1.5395676374435425, "epoch": 220, "memory": 35394, "step": 68647}
{"lr": 0.0013550252614863414, "data_time": 0.00221867561340332, "grad_norm": 0.3609860211610794, "loss": 0.4683104813098907, "time": 1.445785403251648, "epoch": 220, "memory": 35394, "step": 68747}
{"lr": 0.0013542834146361537, "data_time": 0.002998018264770508, "grad_norm": 0.3373274177312851, "loss": 0.4705688625574112, "time": 1.5266978740692139, "epoch": 220, "memory": 35394, "step": 68847}
{"lr": 0.0013534440232188204, "data_time": 0.3528109312057495, "grad_norm": 0.3383351445198059, "loss": 0.4690992712974548, "time": 1.551564884185791, "epoch": 221, "memory": 35394, "step": 68960}
{"lr": 0.0013527002229094796, "data_time": 0.0675506353378296, "grad_norm": 0.32155780792236327, "loss": 0.46686349511146547, "time": 1.5239441394805908, "epoch": 221, "memory": 35394, "step": 69060}
{"lr": 0.001351955507409762, "data_time": 0.0029624462127685546, "grad_norm": 0.32031760513782503, "loss": 0.47266162633895875, "time": 1.5331403017044067, "epoch": 221, "memory": 35394, "step": 69160}
{"lr": 0.0013511128790576132, "data_time": 0.5016392707824707, "grad_norm": 0.3345794051885605, "loss": 0.47031248211860655, "time": 1.554910945892334, "epoch": 222, "memory": 35394, "step": 69273}
{"lr": 0.0013503662183149026, "data_time": 0.2159792184829712, "grad_norm": 0.335563400387764, "loss": 0.46505349278450014, "time": 1.5521260976791382, "epoch": 222, "memory": 35394, "step": 69373}
{"lr": 0.0013496186462465842, "data_time": 0.0034702301025390627, "grad_norm": 0.3789727628231049, "loss": 0.4676647275686264, "time": 1.5342478990554809, "epoch": 222, "memory": 35394, "step": 69473}
{"lr": 0.001348772794628911, "data_time": 0.41562695503234864, "grad_norm": 0.36033526957035067, "loss": 0.4643421858549118, "time": 1.5213031768798828, "epoch": 223, "memory": 35394, "step": 69586}
{"lr": 0.0013480232855653156, "data_time": 0.16674578189849854, "grad_norm": 0.2850770205259323, "loss": 0.4647646307945251, "time": 1.575451421737671, "epoch": 223, "memory": 35394, "step": 69686}
{"lr": 0.0013472728690556656, "data_time": 0.0037891387939453123, "grad_norm": 0.3275949537754059, "loss": 0.46719394624233246, "time": 1.5155475616455079, "epoch": 223, "memory": 35394, "step": 69786}
{"lr": 0.0013464238078940492, "data_time": 0.48112399578094484, "grad_norm": 0.297119514644146, "loss": 0.47006161510944366, "time": 1.4864182949066163, "epoch": 224, "memory": 35394, "step": 69899}
{"lr": 0.0013456714626682625, "data_time": 0.31670632362365725, "grad_norm": 0.3010851204395294, "loss": 0.4742742508649826, "time": 1.5107378244400025, "epoch": 224, "memory": 35394, "step": 69999}
{"lr": 0.0013449182138906928, "data_time": 0.002677726745605469, "grad_norm": 0.32724185287952423, "loss": 0.4642399877309799, "time": 1.4891412019729615, "epoch": 224, "memory": 35394, "step": 70099}
{"lr": 0.0013440659569587776, "data_time": 0.31714591979980467, "grad_norm": 0.28444700688123703, "loss": 0.46981939673423767, "time": 1.500362014770508, "epoch": 225, "memory": 35394, "step": 70212}
{"lr": 0.0013433107877754986, "data_time": 0.23262498378753663, "grad_norm": 0.35317879617214204, "loss": 0.4650521606206894, "time": 1.5008300304412843, "epoch": 225, "memory": 35394, "step": 70312}
{"lr": 0.0013425547189493687, "data_time": 0.0026121616363525392, "grad_norm": 0.30019655227661135, "loss": 0.4639038354158401, "time": 1.5033555507659913, "epoch": 225, "memory": 35394, "step": 70412}
{"lr": 0.0013416992800726467, "data_time": 0.38537213802337644, "grad_norm": 0.285541296005249, "loss": 0.46628267765045167, "time": 1.491797137260437, "epoch": 226, "memory": 35394, "step": 70525}
{"lr": 0.0013409412991823872, "data_time": 0.002702784538269043, "grad_norm": 0.30302584171295166, "loss": 0.47060396075248717, "time": 1.4938310384750366, "epoch": 226, "memory": 35394, "step": 70625}
{"lr": 0.0013401824225728017, "data_time": 0.002935481071472168, "grad_norm": 0.3271701246500015, "loss": 0.4679335355758667, "time": 1.5504031658172608, "epoch": 226, "memory": 35394, "step": 70725}
{"lr": 0.0013393238156283788, "data_time": 0.2763020992279053, "grad_norm": 0.32049059569835664, "loss": 0.4669047802686691, "time": 1.571307325363159, "epoch": 227, "memory": 35394, "step": 70838}
{"lr": 0.0013385630353272599, "data_time": 0.0024616479873657226, "grad_norm": 0.32751513123512266, "loss": 0.4619527101516724, "time": 1.532142162322998, "epoch": 227, "memory": 35394, "step": 70938}
{"lr": 0.0013378013632448717, "data_time": 0.002361297607421875, "grad_norm": 0.2840628445148468, "loss": 0.46951775550842284, "time": 1.4627920150756837, "epoch": 227, "memory": 35394, "step": 71038}
{"lr": 0.001336939602161246, "data_time": 0.41337711811065675, "grad_norm": 0.2998193487524986, "loss": 0.46957763433456423, "time": 1.4947049856185912, "epoch": 228, "memory": 35394, "step": 71151}
{"lr": 0.0013361760347908066, "data_time": 0.24963884353637694, "grad_norm": 0.30736335664987563, "loss": 0.46719238758087156, "time": 1.516456937789917, "epoch": 228, "memory": 35394, "step": 71251}
{"lr": 0.001335411579591618, "data_time": 0.002641773223876953, "grad_norm": 0.31072657406330106, "loss": 0.4631932288408279, "time": 1.4839894771575928, "epoch": 228, "memory": 35394, "step": 71351}
{"lr": 0.0013345466783484569, "data_time": 0.3703241109848022, "grad_norm": 0.3070566087961197, "loss": 0.4729862749576569, "time": 1.4940679311752318, "epoch": 229, "memory": 35394, "step": 71464}
{"lr": 0.0013337803362954437, "data_time": 0.0020418643951416017, "grad_norm": 0.2966490387916565, "loss": 0.46514832973480225, "time": 1.5072141408920288, "epoch": 229, "memory": 35394, "step": 71564}
{"lr": 0.0013330131103806073, "data_time": 0.003056168556213379, "grad_norm": 0.3195452272891998, "loss": 0.46628408432006835, "time": 1.5230916261672973, "epoch": 229, "memory": 35394, "step": 71664}
{"lr": 0.0013321450830085205, "data_time": 0.32437145709991455, "grad_norm": 0.3349012047052383, "loss": 0.4706005185842514, "time": 1.5656681537628174, "epoch": 230, "memory": 35394, "step": 71777}
{"lr": 0.0013313759787046938, "data_time": 0.1346005916595459, "grad_norm": 0.3088129639625549, "loss": 0.46803211271762846, "time": 1.6015883922576903, "epoch": 230, "memory": 35394, "step": 71877}
{"lr": 0.001330605994520305, "data_time": 0.004335093498229981, "grad_norm": 0.3286552518606186, "loss": 0.46912810802459715, "time": 1.536870527267456, "epoch": 230, "memory": 35394, "step": 71977}
{"lr": 0.00132973485510061, "data_time": 0.48975911140441897, "grad_norm": 0.30254857540130614, "loss": 0.46980352997779845, "time": 1.5644702911376953, "epoch": 231, "memory": 35394, "step": 72090}
{"lr": 0.0013289630010225413, "data_time": 0.0020884275436401367, "grad_norm": 0.36432682275772094, "loss": 0.4682702571153641, "time": 1.2556330204010009, "epoch": 231, "memory": 35394, "step": 72190}
{"lr": 0.0013281902710594459, "data_time": 0.01369931697845459, "grad_norm": 0.3245795413851738, "loss": 0.45928249359130857, "time": 1.5177967309951783, "epoch": 231, "memory": 35394, "step": 72290}
{"lr": 0.0013273160337239495, "data_time": 0.22753801345825195, "grad_norm": 0.3600651815533638, "loss": 0.46808559000492095, "time": 1.5070949077606202, "epoch": 232, "memory": 35394, "step": 72403}
{"lr": 0.0013265414423928143, "data_time": 0.0027264833450317385, "grad_norm": 0.3456199109554291, "loss": 0.4708889424800873, "time": 1.4944148063659668, "epoch": 232, "memory": 35394, "step": 72503}
{"lr": 0.001325765979186399, "data_time": 0.002825331687927246, "grad_norm": 0.31385635435581205, "loss": 0.46679621934890747, "time": 1.4866097927093507, "epoch": 232, "memory": 35394, "step": 72603}
{"lr": 0.0013248886581171601, "data_time": 0.2828508853912354, "grad_norm": 0.32176110297441485, "loss": 0.4686097055673599, "time": 1.5332136154174805, "epoch": 233, "memory": 35394, "step": 72716}
{"lr": 0.001324111342098537, "data_time": 0.0028946399688720703, "grad_norm": 0.2975237563252449, "loss": 0.4751867949962616, "time": 1.4751131534576416, "epoch": 233, "memory": 35394, "step": 72816}
{"lr": 0.0013233331582285246, "data_time": 0.0038071393966674803, "grad_norm": 0.28072328120470047, "loss": 0.4628319501876831, "time": 1.4887280941009522, "epoch": 233, "memory": 35394, "step": 72916}
{"lr": 0.0013224527676576271, "data_time": 0.4351125955581665, "grad_norm": 0.32223635315895083, "loss": 0.4651317596435547, "time": 1.4910497665405273, "epoch": 234, "memory": 35394, "step": 73029}
{"lr": 0.0013216727395612997, "data_time": 0.0032270669937133787, "grad_norm": 0.30918710231781005, "loss": 0.47241597771644595, "time": 1.6138617277145386, "epoch": 234, "memory": 35394, "step": 73129}
{"lr": 0.0013208918476515512, "data_time": 0.0040731668472290036, "grad_norm": 0.35755679458379747, "loss": 0.46237051486968994, "time": 1.481725263595581, "epoch": 234, "memory": 35394, "step": 73229}
{"lr": 0.001320008401860874, "data_time": 0.534476900100708, "grad_norm": 0.31985688358545306, "loss": 0.469868603348732, "time": 1.4969049453735352, "epoch": 235, "memory": 35394, "step": 73342}
{"lr": 0.0013192256743406235, "data_time": 0.2628080129623413, "grad_norm": 0.29313919842243197, "loss": 0.4651444911956787, "time": 1.5134499549865723, "epoch": 235, "memory": 35394, "step": 73442}
{"lr": 0.001318442087058929, "data_time": 0.0034357309341430664, "grad_norm": 0.3107015684247017, "loss": 0.46479752361774446, "time": 1.4493027448654174, "epoch": 235, "memory": 35394, "step": 73542}
{"lr": 0.0013175556003799144, "data_time": 0.34846928119659426, "grad_norm": 0.338117316365242, "loss": 0.466731333732605, "time": 1.4722162008285522, "epoch": 236, "memory": 35394, "step": 73655}
{"lr": 0.0013167701861333057, "data_time": 0.09554111957550049, "grad_norm": 0.32030179500579836, "loss": 0.46985894739627837, "time": 1.5088475227355957, "epoch": 236, "memory": 35394, "step": 73755}
{"lr": 0.0013159839161911806, "data_time": 0.0035428524017333983, "grad_norm": 0.34295679330825807, "loss": 0.46374536454677584, "time": 1.4432478904724122, "epoch": 236, "memory": 35394, "step": 73855}
{"lr": 0.0013150944030045988, "data_time": 0.5523995161056519, "grad_norm": 0.3148926183581352, "loss": 0.4628940910100937, "time": 1.4867472887039184, "epoch": 237, "memory": 35394, "step": 73968}
{"lr": 0.0013143063147727837, "data_time": 0.40552291870117185, "grad_norm": 0.30061661899089814, "loss": 0.46664499640464785, "time": 1.4871501445770263, "epoch": 237, "memory": 35394, "step": 74068}
{"lr": 0.0013135173749252655, "data_time": 0.04180631637573242, "grad_norm": 0.33173630982637403, "loss": 0.4687069237232208, "time": 1.4744699239730834, "epoch": 237, "memory": 35394, "step": 74168}
{"lr": 0.0013126248496609825, "data_time": 0.40377805233001707, "grad_norm": 0.33536488860845565, "loss": 0.469461190700531, "time": 1.4663805484771728, "epoch": 238, "memory": 35394, "step": 74281}
{"lr": 0.0013118341002284941, "data_time": 0.17635560035705566, "grad_norm": 0.33947509080171584, "loss": 0.4653932422399521, "time": 1.499754500389099, "epoch": 238, "memory": 35394, "step": 74381}
{"lr": 0.0013110425032739252, "data_time": 0.0027176618576049806, "grad_norm": 0.38483567237854005, "loss": 0.46446220576763153, "time": 1.5704606771469116, "epoch": 238, "memory": 35394, "step": 74481}
{"lr": 0.0013101469804106718, "data_time": 0.31647043228149413, "grad_norm": 0.3238115444779396, "loss": 0.4654929846525192, "time": 1.5159422159194946, "epoch": 239, "memory": 35394, "step": 74594}
{"lr": 0.0013093535826052111, "data_time": 0.004011988639831543, "grad_norm": 0.3689264118671417, "loss": 0.4609528750181198, "time": 1.5569260835647583, "epoch": 239, "memory": 35394, "step": 74694}
{"lr": 0.0013085593413850424, "data_time": 0.003405904769897461, "grad_norm": 0.3187921330332756, "loss": 0.4687100172042847, "time": 1.4651612520217896, "epoch": 239, "memory": 35394, "step": 74794}
{"lr": 0.0013076608354501754, "data_time": 0.42070472240448, "grad_norm": 0.3052439823746681, "loss": 0.47043860554695127, "time": 1.5631870985031129, "epoch": 240, "memory": 35394, "step": 74907}
{"lr": 0.0013068648021424057, "data_time": 0.05683448314666748, "grad_norm": 0.3444280743598938, "loss": 0.46733553111553194, "time": 1.4865837574005127, "epoch": 240, "memory": 35394, "step": 75007}
{"lr": 0.001306067929540983, "data_time": 0.0037830114364624024, "grad_norm": 0.35478511154651643, "loss": 0.46225446462631226, "time": 1.5107024908065796, "epoch": 240, "memory": 35394, "step": 75107}
{"lr": 0.0013051664551102535, "data_time": 0.41273858547210696, "grad_norm": 0.32554986476898196, "loss": 0.47124233841896057, "time": 1.4848944425582886, "epoch": 241, "memory": 35394, "step": 75220}
{"lr": 0.0013043677992135932, "data_time": 0.15115962028503419, "grad_norm": 0.30056044161319734, "loss": 0.46781757175922395, "time": 1.5020212650299072, "epoch": 241, "memory": 35394, "step": 75320}
{"lr": 0.0013035683081579494, "data_time": 0.005869674682617188, "grad_norm": 0.34164765328168867, "loss": 0.4637881129980087, "time": 1.5848402738571168, "epoch": 241, "memory": 35394, "step": 75420}
{"lr": 0.0013026638798552623, "data_time": 0.6091554164886475, "grad_norm": 0.2905029639601707, "loss": 0.4613366395235062, "time": 1.539629578590393, "epoch": 242, "memory": 35394, "step": 75533}
{"lr": 0.0013018626143256717, "data_time": 0.23476922512054443, "grad_norm": 0.318492503464222, "loss": 0.4691872954368591, "time": 1.5508607149124145, "epoch": 242, "memory": 35394, "step": 75633}
{"lr": 0.0013010605177853165, "data_time": 0.143051815032959, "grad_norm": 0.351085239648819, "loss": 0.46646699607372283, "time": 1.4977303743362427, "epoch": 242, "memory": 35394, "step": 75733}
{"lr": 0.001300153150282495, "data_time": 0.3444175720214844, "grad_norm": 0.32250275909900666, "loss": 0.47066726684570315, "time": 1.5078081846237184, "epoch": 243, "memory": 35394, "step": 75846}
{"lr": 0.0012993492881182708, "data_time": 0.2600686073303223, "grad_norm": 0.3249635875225067, "loss": 0.46905105113983153, "time": 1.513573956489563, "epoch": 243, "memory": 35394, "step": 75946}
{"lr": 0.0012985445991049805, "data_time": 0.0036507368087768553, "grad_norm": 0.396460585296154, "loss": 0.4702253669500351, "time": 1.5148457288742065, "epoch": 243, "memory": 35394, "step": 76046}
{"lr": 0.0012976343071215296, "data_time": 0.4391202688217163, "grad_norm": 0.39237338304519653, "loss": 0.4621767789125443, "time": 1.545898985862732, "epoch": 244, "memory": 35394, "step": 76159}
{"lr": 0.001296827861363089, "data_time": 0.002884244918823242, "grad_norm": 0.29939399659633636, "loss": 0.4686032831668854, "time": 1.195367169380188, "epoch": 244, "memory": 35394, "step": 76259}
{"lr": 0.0012960205929306969, "data_time": 0.0024194240570068358, "grad_norm": 0.32455210089683534, "loss": 0.46469132602214813, "time": 1.5207327842712401, "epoch": 244, "memory": 35394, "step": 76359}
{"lr": 0.001295107391233562, "data_time": 0.45549888610839845, "grad_norm": 0.3120018973946571, "loss": 0.4662753134965897, "time": 1.5210197925567628, "epoch": 245, "memory": 35394, "step": 76472}
{"lr": 0.0012942983749632383, "data_time": 0.19333128929138182, "grad_norm": 0.3546252727508545, "loss": 0.46863662004470824, "time": 1.4430281162261962, "epoch": 245, "memory": 35394, "step": 76572}
{"lr": 0.0012934885402074194, "data_time": 0.0028992176055908205, "grad_norm": 0.31307715028524397, "loss": 0.464748740196228, "time": 1.5656473875045775, "epoch": 245, "memory": 35394, "step": 76672}
{"lr": 0.0012925724436107465, "data_time": 0.36916298866271974, "grad_norm": 0.2948297500610352, "loss": 0.4627065986394882, "time": 1.5231719493865967, "epoch": 246, "memory": 35394, "step": 76785}
{"lr": 0.0012917608699525678, "data_time": 0.002116703987121582, "grad_norm": 0.32975850999355316, "loss": 0.4714215248823166, "time": 1.5688661336898804, "epoch": 246, "memory": 35394, "step": 76885}
{"lr": 0.0012909484820106299, "data_time": 0.0029756307601928713, "grad_norm": 0.36044541001319885, "loss": 0.4663770139217377, "time": 1.5390424251556396, "epoch": 246, "memory": 35394, "step": 76985}
{"lr": 0.0012900295053755307, "data_time": 0.3253249406814575, "grad_norm": 0.30255201905965806, "loss": 0.4730460911989212, "time": 1.5364533424377442, "epoch": 247, "memory": 35394, "step": 77098}
{"lr": 0.0012892153874950137, "data_time": 0.06766343116760254, "grad_norm": 0.2998863384127617, "loss": 0.4691061645746231, "time": 1.5228681087493896, "epoch": 247, "memory": 35394, "step": 77198}
{"lr": 0.0012884004595456822, "data_time": 0.0023962736129760744, "grad_norm": 0.37679690420627593, "loss": 0.46624680161476134, "time": 1.5113505125045776, "epoch": 247, "memory": 35394, "step": 77298}
{"lr": 0.0012874786177799844, "data_time": 0.3771209239959717, "grad_norm": 0.3124998494982719, "loss": 0.46809901893138883, "time": 1.4644686937332154, "epoch": 248, "memory": 35394, "step": 77411}
{"lr": 0.001286661968883918, "data_time": 0.03960638046264649, "grad_norm": 0.3236915275454521, "loss": 0.4632264494895935, "time": 1.5134558200836181, "epoch": 248, "memory": 35394, "step": 77511}
{"lr": 0.0012858445141471228, "data_time": 0.0037645339965820313, "grad_norm": 0.31766601502895353, "loss": 0.46370447278022764, "time": 1.4462806463241578, "epoch": 248, "memory": 35394, "step": 77611}
{"lr": 0.0012849198222051343, "data_time": 0.2254669189453125, "grad_norm": 0.302718748152256, "loss": 0.4687885642051697, "time": 1.4973381519317628, "epoch": 249, "memory": 35394, "step": 77724}
{"lr": 0.0012841006555413669, "data_time": 0.0026499748229980467, "grad_norm": 0.3713096484541893, "loss": 0.46449984014034273, "time": 1.5938998937606812, "epoch": 249, "memory": 35394, "step": 77824}
{"lr": 0.00128328068727803, "data_time": 0.003267860412597656, "grad_norm": 0.3704144150018692, "loss": 0.4635948359966278, "time": 1.4978836297988891, "epoch": 249, "memory": 35394, "step": 77924}
{"lr": 0.0012823531601602967, "data_time": 0.516349196434021, "grad_norm": 0.27386185973882676, "loss": 0.46609307527542115, "time": 1.4919346570968628, "epoch": 250, "memory": 35394, "step": 78037}
{"lr": 0.0012815314890175184, "data_time": 0.295351243019104, "grad_norm": 0.32138467729091647, "loss": 0.469550234079361, "time": 1.532761549949646, "epoch": 250, "memory": 35394, "step": 78137}
{"lr": 0.0012807090205293314, "data_time": 0.004071807861328125, "grad_norm": 0.30280441641807554, "loss": 0.4698744654655457, "time": 1.4106571435928346, "epoch": 250, "memory": 35394, "step": 78237}
{"lr": 0.0012797786732823891, "data_time": 0.40531761646270753, "grad_norm": 0.3388435125350952, "loss": 0.46369911432266236, "time": 1.469050145149231, "epoch": 251, "memory": 35394, "step": 78350}
{"lr": 0.001278954510989921, "data_time": 0.1972501516342163, "grad_norm": 0.28827568888664246, "loss": 0.46401548087596894, "time": 1.5358712434768678, "epoch": 251, "memory": 35394, "step": 78450}
{"lr": 0.0012781295556191423, "data_time": 0.0029508352279663088, "grad_norm": 0.38253557235002517, "loss": 0.4637563943862915, "time": 1.5093847274780274, "epoch": 251, "memory": 35394, "step": 78550}
{"lr": 0.001277196403335278, "data_time": 0.42583503723144533, "grad_norm": 0.31694907546043394, "loss": 0.4622936338186264, "time": 1.5229913711547851, "epoch": 252, "memory": 35394, "step": 78663}
{"lr": 0.0012763697632628525, "data_time": 0.0031701803207397463, "grad_norm": 0.3875380516052246, "loss": 0.46701681017875674, "time": 1.5107484579086303, "epoch": 252, "memory": 35394, "step": 78763}
{"lr": 0.0012755423343920773, "data_time": 0.0032306909561157227, "grad_norm": 0.37837757766246793, "loss": 0.4656889885663986, "time": 1.589755129814148, "epoch": 252, "memory": 35394, "step": 78863}
{"lr": 0.0012746063922090822, "data_time": 0.3290176153182983, "grad_norm": 0.3420154988765717, "loss": 0.4669954180717468, "time": 1.3119521617889405, "epoch": 253, "memory": 35394, "step": 78976}
{"lr": 0.0012737772877666248, "data_time": 0.13404054641723634, "grad_norm": 0.31010129302740097, "loss": 0.46312508583068845, "time": 1.5081927537918092, "epoch": 253, "memory": 35394, "step": 79076}
{"lr": 0.001272947398818578, "data_time": 0.0042103052139282225, "grad_norm": 0.3066908285021782, "loss": 0.4658035457134247, "time": 1.466195273399353, "epoch": 253, "memory": 35394, "step": 79176}
{"lr": 0.001272008681919499, "data_time": 0.5530344247817993, "grad_norm": 0.32617090791463854, "loss": 0.46380678117275237, "time": 1.5789937734603883, "epoch": 254, "memory": 35394, "step": 79289}
{"lr": 0.0012711771265569156, "data_time": 0.30793545246124265, "grad_norm": 0.3680302366614342, "loss": 0.46476395428180695, "time": 1.5200053215026856, "epoch": 254, "memory": 35394, "step": 79389}
{"lr": 0.0012703447909942266, "data_time": 0.00396120548248291, "grad_norm": 0.2884763449430466, "loss": 0.468164524435997, "time": 1.5073227167129517, "epoch": 254, "memory": 35394, "step": 79489}
{"lr": 0.0012694033146071235, "data_time": 0.4193340063095093, "grad_norm": 0.343612465262413, "loss": 0.4592425525188446, "time": 1.5314547538757324, "epoch": 255, "memory": 35394, "step": 79602}
{"lr": 0.0012685693218140767, "data_time": 0.021576809883117675, "grad_norm": 0.3501312047243118, "loss": 0.4641679346561432, "time": 1.5809084892272949, "epoch": 255, "memory": 35394, "step": 79702}
{"lr": 0.0012677345531390662, "data_time": 0.003723001480102539, "grad_norm": 0.33026507645845415, "loss": 0.47488475143909453, "time": 1.6042247772216798, "epoch": 255, "memory": 35394, "step": 79802}
{"lr": 0.0012667903325367662, "data_time": 0.5657104253768921, "grad_norm": 0.36831189393997193, "loss": 0.46781024932861326, "time": 1.4952522754669189, "epoch": 256, "memory": 35394, "step": 79915}
{"lr": 0.0012659539158424607, "data_time": 0.21315624713897705, "grad_norm": 0.33322587311267854, "loss": 0.46920049786567686, "time": 1.4831294536590576, "epoch": 256, "memory": 35394, "step": 80015}
{"lr": 0.0012651167275969197, "data_time": 0.003029942512512207, "grad_norm": 0.3658388525247574, "loss": 0.4665148466825485, "time": 1.536159348487854, "epoch": 256, "memory": 35394, "step": 80115}
{"lr": 0.0012641697780967638, "data_time": 0.18049757480621337, "grad_norm": 0.41781883537769315, "loss": 0.47041677832603457, "time": 1.5663703680038452, "epoch": 257, "memory": 35394, "step": 80228}
{"lr": 0.001263330951069724, "data_time": 0.002169179916381836, "grad_norm": 0.3491339385509491, "loss": 0.4658224195241928, "time": 1.4696458339691163, "epoch": 257, "memory": 35394, "step": 80328}
{"lr": 0.0012624913568346937, "data_time": 0.0030460357666015625, "grad_norm": 0.347825887799263, "loss": 0.46384595334529877, "time": 1.4727134466171266, "epoch": 257, "memory": 35394, "step": 80428}
{"lr": 0.0012615416937982963, "data_time": 0.5104897499084473, "grad_norm": 0.3072036072611809, "loss": 0.4670787364244461, "time": 1.5709781646728516, "epoch": 258, "memory": 35394, "step": 80541}
{"lr": 0.001260700470046147, "data_time": 0.07982211112976074, "grad_norm": 0.3320091962814331, "loss": 0.46597607135772706, "time": 1.5225510835647582, "epoch": 258, "memory": 35394, "step": 80641}
{"lr": 0.0012598584834416988, "data_time": 0.0030711889266967773, "grad_norm": 0.3058071345090866, "loss": 0.4711665242910385, "time": 1.5480218410491944, "epoch": 258, "memory": 35394, "step": 80741}
{"lr": 0.0012589061222746873, "data_time": 0.5820668697357178, "grad_norm": 0.3016579419374466, "loss": 0.46747419238090515, "time": 1.5270774841308594, "epoch": 259, "memory": 35394, "step": 80854}
{"lr": 0.0012580625154439372, "data_time": 0.08549838066101074, "grad_norm": 0.35040887296199796, "loss": 0.4753994524478912, "time": 1.4340322971343995, "epoch": 259, "memory": 35394, "step": 80954}
{"lr": 0.0012572181501289512, "data_time": 0.004210209846496582, "grad_norm": 0.3353484123945236, "loss": 0.46854735910892487, "time": 1.5599142789840699, "epoch": 259, "memory": 35394, "step": 81054}
{"lr": 0.001256263106280727, "data_time": 0.3711008787155151, "grad_norm": 0.31395083069801333, "loss": 0.4599418967962265, "time": 1.5523782968521118, "epoch": 260, "memory": 35394, "step": 81167}
{"lr": 0.0012554171300565402, "data_time": 0.0022522449493408204, "grad_norm": 0.41849332898855207, "loss": 0.45896784365177157, "time": 1.5060030937194824, "epoch": 260, "memory": 35394, "step": 81267}
{"lr": 0.0012545703997284881, "data_time": 0.0027588605880737305, "grad_norm": 0.373277285695076, "loss": 0.45720756947994234, "time": 1.4762648344039917, "epoch": 260, "memory": 35394, "step": 81367}
{"lr": 0.00125361268869197, "data_time": 0.3349841833114624, "grad_norm": 0.3347054570913315, "loss": 0.4623743802309036, "time": 1.5142607927322387, "epoch": 261, "memory": 35394, "step": 81480}
{"lr": 0.0012527643567979484, "data_time": 0.026194167137145997, "grad_norm": 0.35263668894767763, "loss": 0.4587130308151245, "time": 1.514197325706482, "epoch": 261, "memory": 35394, "step": 81580}
{"lr": 0.0012519152751926648, "data_time": 0.0028854846954345704, "grad_norm": 0.29710915237665175, "loss": 0.4640374034643173, "time": 1.4915489435195923, "epoch": 261, "memory": 35394, "step": 81680}
{"lr": 0.0012509549125040416, "data_time": 0.506926441192627, "grad_norm": 0.3115774765610695, "loss": 0.4665748417377472, "time": 1.455892014503479, "epoch": 262, "memory": 35394, "step": 81793}
{"lr": 0.0012501042387020028, "data_time": 0.21654489040374755, "grad_norm": 0.3531580239534378, "loss": 0.4702944219112396, "time": 1.5267876148223878, "epoch": 262, "memory": 35394, "step": 81893}
{"lr": 0.0012492528195934645, "data_time": 0.0026345252990722656, "grad_norm": 0.38324601054191587, "loss": 0.46556368470191956, "time": 1.5125394821166993, "epoch": 262, "memory": 35394, "step": 81993}
{"lr": 0.0012482898208319385, "data_time": 0.14777612686157227, "grad_norm": 0.3208197608590126, "loss": 0.4611200660467148, "time": 1.5339725255966186, "epoch": 263, "memory": 35394, "step": 82106}
{"lr": 0.0012474368189216878, "data_time": 0.0020541906356811523, "grad_norm": 0.3591847062110901, "loss": 0.4612447261810303, "time": 1.528767991065979, "epoch": 263, "memory": 35394, "step": 82206}
{"lr": 0.0012465830761217943, "data_time": 0.00292353630065918, "grad_norm": 0.37636240422725675, "loss": 0.45819305181503295, "time": 1.4050469875335694, "epoch": 263, "memory": 35394, "step": 82306}
{"lr": 0.00124561745690933, "data_time": 0.26273365020751954, "grad_norm": 0.3387488692998886, "loss": 0.4587724983692169, "time": 1.5081710815429688, "epoch": 264, "memory": 35394, "step": 82419}
{"lr": 0.0012447621407284417, "data_time": 0.0019263744354248047, "grad_norm": 0.27965860813856125, "loss": 0.46387807130813596, "time": 1.5446446418762207, "epoch": 264, "memory": 35394, "step": 82519}
{"lr": 0.0012439060880867843, "data_time": 0.0032094478607177734, "grad_norm": 0.3719946801662445, "loss": 0.46506820917129515, "time": 1.5917941808700562, "epoch": 264, "memory": 35394, "step": 82619}
{"lr": 0.0012429378640878579, "data_time": 0.40904395580291747, "grad_norm": 0.35605699121952056, "loss": 0.47119680643081663, "time": 1.5347490072250367, "epoch": 265, "memory": 35394, "step": 82732}
{"lr": 0.001242080247511447, "data_time": 0.002452540397644043, "grad_norm": 0.349962417781353, "loss": 0.46526262164115906, "time": 1.4712541580200196, "epoch": 265, "memory": 35394, "step": 82832}
{"lr": 0.0012412218989150893, "data_time": 0.002834773063659668, "grad_norm": 0.2992583841085434, "loss": 0.4696843773126602, "time": 1.5389156579971313, "epoch": 265, "memory": 35394, "step": 82932}
{"lr": 0.00124025108583643, "data_time": 0.39367051124572755, "grad_norm": 0.36501734256744384, "loss": 0.4753405064344406, "time": 1.4776448011398315, "epoch": 266, "memory": 35394, "step": 83045}
{"lr": 0.001239391182776934, "data_time": 0.03767402172088623, "grad_norm": 0.315607026219368, "loss": 0.46515450477600095, "time": 1.5012179613113403, "epoch": 266, "memory": 35394, "step": 83145}
{"lr": 0.0012385305521501852, "data_time": 0.0027238130569458008, "grad_norm": 0.36847085058689116, "loss": 0.4638118028640747, "time": 1.5334633111953735, "epoch": 266, "memory": 35394, "step": 83245}
{"lr": 0.0012375571657405268, "data_time": 0.40223863124847414, "grad_norm": 0.32805008739233016, "loss": 0.46951937973499297, "time": 1.5382627725601197, "epoch": 267, "memory": 35394, "step": 83358}
{"lr": 0.0012366949901474692, "data_time": 0.0024191617965698244, "grad_norm": 0.30468040704727173, "loss": 0.46619117856025694, "time": 1.5378054857254029, "epoch": 267, "memory": 35394, "step": 83458}
{"lr": 0.0012358320914516597, "data_time": 0.003004789352416992, "grad_norm": 0.34270383417606354, "loss": 0.460872158408165, "time": 1.5088507890701295, "epoch": 267, "memory": 35394, "step": 83558}
{"lr": 0.001234856147501477, "data_time": 0.295900821685791, "grad_norm": 0.34570054709911346, "loss": 0.4666473150253296, "time": 1.431367039680481, "epoch": 268, "memory": 35394, "step": 83671}
{"lr": 0.0012339917133612508, "data_time": 0.06195929050445557, "grad_norm": 0.3907825738191605, "loss": 0.4703759402036667, "time": 1.4961795091629029, "epoch": 268, "memory": 35394, "step": 83771}
{"lr": 0.0012331265605944979, "data_time": 0.002770256996154785, "grad_norm": 0.3184709459543228, "loss": 0.46431903541088104, "time": 1.5439798355102539, "epoch": 268, "memory": 35394, "step": 83871}
{"lr": 0.0012321480749357572, "data_time": 0.44352943897247316, "grad_norm": 0.35427832007408144, "loss": 0.46455860137939453, "time": 1.50501389503479, "epoch": 269, "memory": 35394, "step": 83984}
{"lr": 0.0012312813962713895, "data_time": 0.10743896961212158, "grad_norm": 0.3189982146024704, "loss": 0.4596468061208725, "time": 1.530650782585144, "epoch": 269, "memory": 35394, "step": 84084}
{"lr": 0.0012304140034683862, "data_time": 0.003585219383239746, "grad_norm": 0.3320038452744484, "loss": 0.46487640738487246, "time": 1.5898433685302735, "epoch": 269, "memory": 35394, "step": 84184}
{"lr": 0.0012294329919742848, "data_time": 0.35251238346099856, "grad_norm": 0.3846312344074249, "loss": 0.46011226177215575, "time": 1.5209821939468384, "epoch": 270, "memory": 35394, "step": 84297}
{"lr": 0.0012285640828452184, "data_time": 0.0027587175369262694, "grad_norm": 0.38846727311611173, "loss": 0.4595049351453781, "time": 1.4998963356018067, "epoch": 270, "memory": 35394, "step": 84397}
{"lr": 0.0012276944640769885, "data_time": 0.003649330139160156, "grad_norm": 0.3162503868341446, "loss": 0.46310303509235384, "time": 1.494322156906128, "epoch": 270, "memory": 35394, "step": 84497}
{"lr": 0.0012267109426616985, "data_time": 0.3860471248626709, "grad_norm": 0.3277240559458733, "loss": 0.4721166968345642, "time": 1.5363848209381104, "epoch": 271, "memory": 35394, "step": 84610}
{"lr": 0.001225839817163558, "data_time": 0.11388020515441895, "grad_norm": 0.3593950718641281, "loss": 0.46417529284954073, "time": 1.5294118404388428, "epoch": 271, "memory": 35394, "step": 84710}
{"lr": 0.0012249679865372364, "data_time": 0.0029093742370605467, "grad_norm": 0.32325114458799364, "loss": 0.46242464780807496, "time": 1.5406227111816406, "epoch": 271, "memory": 35394, "step": 84810}
{"lr": 0.0012239819711556458, "data_time": 0.5042885303497314, "grad_norm": 0.333407586812973, "loss": 0.47032604217529295, "time": 1.5357633352279663, "epoch": 272, "memory": 35394, "step": 84923}
{"lr": 0.0012231086434200086, "data_time": 0.18458693027496337, "grad_norm": 0.37514606714248655, "loss": 0.47030189633369446, "time": 1.6284438133239747, "epoch": 272, "memory": 35394, "step": 85023}
{"lr": 0.0012222346150786159, "data_time": 0.003295230865478516, "grad_norm": 0.38026343286037445, "loss": 0.46199472844600675, "time": 1.5879291534423827, "epoch": 272, "memory": 35394, "step": 85123}
{"lr": 0.0012212461217260698, "data_time": 0.5181488275527955, "grad_norm": 0.3429690048098564, "loss": 0.47327795922756194, "time": 1.599101448059082, "epoch": 273, "memory": 35394, "step": 85236}
{"lr": 0.001220370605920241, "data_time": 0.0025919437408447265, "grad_norm": 0.3459371328353882, "loss": 0.46069936752319335, "time": 1.5707094192504882, "epoch": 273, "memory": 35394, "step": 85336}
{"lr": 0.0012194943940424426, "data_time": 0.0034863948822021484, "grad_norm": 0.337634938955307, "loss": 0.4702834665775299, "time": 1.5490986824035644, "epoch": 273, "memory": 35394, "step": 85436}
{"lr": 0.0012185034387544882, "data_time": 0.45970380306243896, "grad_norm": 0.3319919750094414, "loss": 0.4605804681777954, "time": 1.586451482772827, "epoch": 274, "memory": 35394, "step": 85549}
{"lr": 0.0012176257490812672, "data_time": 0.03954029083251953, "grad_norm": 0.3359376847743988, "loss": 0.46178382635116577, "time": 1.5715686321258544, "epoch": 274, "memory": 35394, "step": 85649}
{"lr": 0.001216747367881155, "data_time": 0.0029433012008666993, "grad_norm": 0.28774468302726747, "loss": 0.46424227952957153, "time": 1.4995102882385254, "epoch": 274, "memory": 35394, "step": 85749}
{"lr": 0.0012157539667332722, "data_time": 0.46995952129364016, "grad_norm": 0.3619896471500397, "loss": 0.4622582823038101, "time": 1.5467487573623657, "epoch": 275, "memory": 35394, "step": 85862}
{"lr": 0.0012148741174307237, "data_time": 0.14327285289764405, "grad_norm": 0.35565062165260314, "loss": 0.46028717756271365, "time": 1.5990921020507813, "epoch": 275, "memory": 35394, "step": 85962}
{"lr": 0.0012139935811575828, "data_time": 0.0029741525650024414, "grad_norm": 0.3156394898891449, "loss": 0.4663603127002716, "time": 1.585575556755066, "epoch": 275, "memory": 35394, "step": 86062}
{"lr": 0.0012129977502649302, "data_time": 0.46802520751953125, "grad_norm": 0.3152597099542618, "loss": 0.46627378165721894, "time": 1.553739356994629, "epoch": 276, "memory": 35394, "step": 86175}
{"lr": 0.0012121157556061507, "data_time": 0.002151751518249512, "grad_norm": 0.397307425737381, "loss": 0.46512063443660734, "time": 1.5695884943008422, "epoch": 276, "memory": 35394, "step": 86275}
{"lr": 0.0012112330785442199, "data_time": 0.003231716156005859, "grad_norm": 0.36777080595493317, "loss": 0.46842253804206846, "time": 1.5396397590637207, "epoch": 276, "memory": 35394, "step": 86375}
{"lr": 0.0012102348340613752, "data_time": 0.023497295379638673, "grad_norm": 0.34312145709991454, "loss": 0.466575825214386, "time": 1.5641521215438843, "epoch": 277, "memory": 35394, "step": 86488}
{"lr": 0.0012093507083542642, "data_time": 0.001926279067993164, "grad_norm": 0.3099060237407684, "loss": 0.46802146136760714, "time": 1.5274041652679444, "epoch": 277, "memory": 35394, "step": 86588}
{"lr": 0.001208465904822513, "data_time": 0.002985572814941406, "grad_norm": 0.3414572969079018, "loss": 0.4632605969905853, "time": 1.5077600002288818, "epoch": 277, "memory": 35394, "step": 86688}
{"lr": 0.0012074652629432037, "data_time": 0.4642640113830566, "grad_norm": 0.3766774147748947, "loss": 0.4645500808954239, "time": 1.5441750049591065, "epoch": 278, "memory": 35394, "step": 86801}
{"lr": 0.0012065790205302326, "data_time": 0.23091371059417726, "grad_norm": 0.33131963312625884, "loss": 0.46733894348144533, "time": 1.5386995077133179, "epoch": 278, "memory": 35394, "step": 86901}
{"lr": 0.0012056921048821307, "data_time": 0.04521269798278808, "grad_norm": 0.3346737951040268, "loss": 0.460465082526207, "time": 1.6833032608032226, "epoch": 278, "memory": 35394, "step": 87001}
{"lr": 0.0012046890818389788, "data_time": 0.3774148464202881, "grad_norm": 0.3846255898475647, "loss": 0.4682097017765045, "time": 1.5149388790130616, "epoch": 279, "memory": 35394, "step": 87114}
{"lr": 0.0012038007370969558, "data_time": 0.0022800683975219725, "grad_norm": 0.376416939496994, "loss": 0.45581862032413484, "time": 1.5742691278457641, "epoch": 279, "memory": 35394, "step": 87214}
{"lr": 0.0012029117237202307, "data_time": 0.003077244758605957, "grad_norm": 0.30848177075386046, "loss": 0.4632583290338516, "time": 1.5687822341918944, "epoch": 279, "memory": 35394, "step": 87314}
{"lr": 0.0012019063357844822, "data_time": 0.538260531425476, "grad_norm": 0.3709270298480988, "loss": 0.4631988435983658, "time": 1.5495817422866822, "epoch": 280, "memory": 35394, "step": 87427}
{"lr": 0.0012010159031243211, "data_time": 0.3526583194732666, "grad_norm": 0.40487004816532135, "loss": 0.47429869771003724, "time": 1.63309588432312, "epoch": 280, "memory": 35394, "step": 87527}
{"lr": 0.0012001248064407333, "data_time": 0.004546308517456054, "grad_norm": Infinity, "loss": 0.46113031506538393, "time": 1.5478166580200194, "epoch": 280, "memory": 35394, "step": 87627}
{"lr": 0.001199117069922003, "data_time": 0.49658291339874266, "grad_norm": 0.31126977503299713, "loss": 0.4678483933210373, "time": 1.4468307971954346, "epoch": 281, "memory": 35394, "step": 87740}
{"lr": 0.001198224563788486, "data_time": 0.22725331783294678, "grad_norm": 0.37149758338928224, "loss": 0.4633202165365219, "time": 1.4922884225845336, "epoch": 281, "memory": 35394, "step": 87840}
{"lr": 0.0011973313982535916, "data_time": 0.0026759862899780273, "grad_norm": 0.36448719799518586, "loss": 0.4633750647306442, "time": 1.5599793672561646, "epoch": 281, "memory": 35394, "step": 87940}
{"lr": 0.0011963213294995875, "data_time": 0.0023265838623046874, "grad_norm": 0.3210224986076355, "loss": 0.4680168777704239, "time": 1.5934988498687743, "epoch": 282, "memory": 35394, "step": 88053}
{"lr": 0.0011954267643711355, "data_time": 0.0024870157241821287, "grad_norm": 0.38854133784770967, "loss": 0.46702973544597626, "time": 1.554498815536499, "epoch": 282, "memory": 35394, "step": 88153}
{"lr": 0.0011945315444740513, "data_time": 0.0031830310821533204, "grad_norm": 0.4659465879201889, "loss": 0.465714755654335, "time": 1.679362416267395, "epoch": 282, "memory": 35394, "step": 88253}
{"lr": 0.0011935191598703204, "data_time": 0.31987454891204836, "grad_norm": 0.37930222153663634, "loss": 0.46836510896682737, "time": 1.6049696207046509, "epoch": 283, "memory": 35394, "step": 88366}
{"lr": 0.001192622550258755, "data_time": 0.0021160602569580077, "grad_norm": 0.33068291693925855, "loss": 0.46855681836605073, "time": 1.5970706701278687, "epoch": 283, "memory": 35394, "step": 88466}
{"lr": 0.0011917252905219219, "data_time": 0.003409886360168457, "grad_norm": 0.35214547216892245, "loss": 0.46543982326984407, "time": 1.6046154975891114, "epoch": 283, "memory": 35394, "step": 88566}
{"lr": 0.0011907106064915761, "data_time": 0.6089071273803711, "grad_norm": 0.33913220912218095, "loss": 0.4698364794254303, "time": 1.5496392726898194, "epoch": 284, "memory": 35394, "step": 88679}
{"lr": 0.0011898119669418832, "data_time": 0.39961073398590086, "grad_norm": 0.3672237128019333, "loss": 0.4728143781423569, "time": 1.6662665128707885, "epoch": 284, "memory": 35394, "step": 88779}
{"lr": 0.0011889126819208365, "data_time": 0.0032920122146606447, "grad_norm": 0.3887747526168823, "loss": 0.46705553829669955, "time": 1.5979070663452148, "epoch": 284, "memory": 35394, "step": 88879}
{"lr": 0.0011878957149242912, "data_time": 0.6087878227233887, "grad_norm": 0.3611311405897141, "loss": 0.4600668400526047, "time": 1.6484699726104737, "epoch": 285, "memory": 35394, "step": 88992}
{"lr": 0.0011869950600143886, "data_time": 0.3694000244140625, "grad_norm": 0.33576592355966567, "loss": 0.4621111124753952, "time": 1.57984356880188, "epoch": 285, "memory": 35394, "step": 89092}
{"lr": 0.0011860937642975164, "data_time": 0.004526567459106445, "grad_norm": 0.3474994242191315, "loss": 0.464627680182457, "time": 1.6636714696884156, "epoch": 285, "memory": 35394, "step": 89192}
{"lr": 0.0011850745308322206, "data_time": 0.39878480434417723, "grad_norm": 0.38627201318740845, "loss": 0.4668616861104965, "time": 1.560717225074768, "epoch": 286, "memory": 35394, "step": 89305}
{"lr": 0.0011841718751727187, "data_time": 0.002228569984436035, "grad_norm": 0.3575836569070816, "loss": 0.4669040530920029, "time": 1.5687651872634887, "epoch": 286, "memory": 35394, "step": 89405}
{"lr": 0.0011832685833810263, "data_time": 0.0026723146438598633, "grad_norm": 0.37213812172412875, "loss": 0.46183042228221893, "time": 1.5764289379119873, "epoch": 286, "memory": 35394, "step": 89505}
{"lr": 0.0011822470999811974, "data_time": 0.4815340042114258, "grad_norm": 0.35899750590324403, "loss": 0.460393151640892, "time": 1.6401802062988282, "epoch": 287, "memory": 35394, "step": 89618}
{"lr": 0.0011813424582151624, "data_time": 0.014884424209594727, "grad_norm": 0.32994766533374786, "loss": 0.46829311847686766, "time": 1.5523320198059083, "epoch": 287, "memory": 35394, "step": 89718}
{"lr": 0.0011804371850020368, "data_time": 0.0031268119812011717, "grad_norm": 0.34396361112594603, "loss": 0.465838760137558, "time": 1.6441633462905885, "epoch": 287, "memory": 35394, "step": 89818}
{"lr": 0.0011794134682383904, "data_time": 0.44485249519348147, "grad_norm": 0.3681080386042595, "loss": 0.4643924117088318, "time": 1.6353218317031861, "epoch": 288, "memory": 35394, "step": 89931}
{"lr": 0.0011785068550411104, "data_time": 0.14226632118225097, "grad_norm": 0.3565189868211746, "loss": 0.46496706902980806, "time": 1.6225261688232422, "epoch": 288, "memory": 35394, "step": 90031}
{"lr": 0.0011775996150920824, "data_time": 0.0027550697326660157, "grad_norm": 0.3769795596599579, "loss": 0.4657506585121155, "time": 1.6009649276733398, "epoch": 288, "memory": 35394, "step": 90131}
{"lr": 0.0011765736815715641, "data_time": 0.6191456079483032, "grad_norm": 0.3425858527421951, "loss": 0.46896255016326904, "time": 1.5387473583221436, "epoch": 289, "memory": 35394, "step": 90244}
{"lr": 0.0011756651116503066, "data_time": 0.2670077085494995, "grad_norm": 0.39491141140460967, "loss": 0.4646580755710602, "time": 1.6411162853240966, "epoch": 289, "memory": 35394, "step": 90344}
{"lr": 0.0011747559196828106, "data_time": 0.002831697463989258, "grad_norm": 0.39591694474220274, "loss": 0.46739926338195803, "time": 1.6391223430633546, "epoch": 289, "memory": 35394, "step": 90444}
{"lr": 0.0011737277860483259, "data_time": 0.6744180917739868, "grad_norm": 0.3295677751302719, "loss": 0.4645369291305542, "time": 1.59265079498291, "epoch": 290, "memory": 35394, "step": 90557}
{"lr": 0.001172817274142101, "data_time": 0.35424206256866453, "grad_norm": 0.34442706406116486, "loss": 0.4650325894355774, "time": 1.5509178161621093, "epoch": 290, "memory": 35394, "step": 90657}
{"lr": 0.001171906144905237, "data_time": 0.09068679809570312, "grad_norm": 0.3749510258436203, "loss": 0.46779442131519317, "time": 1.614454174041748, "epoch": 290, "memory": 35394, "step": 90757}
{"lr": 0.001170875827835386, "data_time": 0.5342430114746094, "grad_norm": 0.441801455616951, "loss": 0.4633727103471756, "time": 1.6441303491592407, "epoch": 291, "memory": 35394, "step": 90870}
{"lr": 0.001169963388714706, "data_time": 0.40367305278778076, "grad_norm": 0.36408682763576505, "loss": 0.46494604647159576, "time": 1.6246465682983398, "epoch": 291, "memory": 35394, "step": 90970}
{"lr": 0.0011690503369890034, "data_time": 0.0026729345321655274, "grad_norm": 0.37570794522762296, "loss": 0.4666686475276947, "time": 1.6542626857757567, "epoch": 291, "memory": 35394, "step": 91070}
{"lr": 0.0011680178531977994, "data_time": 0.4554134845733643, "grad_norm": 0.3399913102388382, "loss": 0.46394857466220857, "time": 1.585689115524292, "epoch": 292, "memory": 35394, "step": 91183}
{"lr": 0.001167103501664442, "data_time": 0.0020641088485717773, "grad_norm": 0.3417989447712898, "loss": 0.46301422119140623, "time": 1.617223310470581, "epoch": 292, "memory": 35394, "step": 91283}
{"lr": 0.0011661885422616135, "data_time": 0.002950906753540039, "grad_norm": 0.38059247732162477, "loss": 0.46475826799869535, "time": 1.5846046924591064, "epoch": 292, "memory": 35394, "step": 91383}
{"lr": 0.001165153908498226, "data_time": 0.30475726127624514, "grad_norm": 0.38393640220165254, "loss": 0.4657118827104568, "time": 1.5977607727050782, "epoch": 293, "memory": 35394, "step": 91496}
{"lr": 0.0011642376593849915, "data_time": 0.0023902416229248046, "grad_norm": 0.32545401304960253, "loss": 0.46666126549243925, "time": 1.6187320470809936, "epoch": 293, "memory": 35394, "step": 91596}
{"lr": 0.0011633208071476996, "data_time": 0.0027580022811889648, "grad_norm": 0.34029747247695924, "loss": 0.4655200183391571, "time": 1.5613500356674195, "epoch": 293, "memory": 35394, "step": 91696}
{"lr": 0.0011622840401961715, "data_time": 0.5954462051391601, "grad_norm": 0.33289153277873995, "loss": 0.46241928040981295, "time": 1.6470914363861084, "epoch": 294, "memory": 35394, "step": 91809}
{"lr": 0.0011613659083666412, "data_time": 0.26839823722839357, "grad_norm": 0.37913145422935485, "loss": 0.4622330337762833, "time": 1.6131415367126465, "epoch": 294, "memory": 35394, "step": 91909}
{"lr": 0.0011604471781682516, "data_time": 0.0031727313995361327, "grad_norm": 0.35454994440078735, "loss": 0.45683835446834564, "time": 1.5900266885757446, "epoch": 294, "memory": 35394, "step": 92009}
{"lr": 0.001159408294847231, "data_time": 0.648142957687378, "grad_norm": 0.3824733108282089, "loss": 0.4612177759408951, "time": 1.5738375663757325, "epoch": 295, "memory": 35394, "step": 92122}
{"lr": 0.001158488295195531, "data_time": 0.29166743755340574, "grad_norm": 0.3968316465616226, "loss": 0.46058167815208434, "time": 1.6405713081359863, "epoch": 295, "memory": 35394, "step": 92222}
{"lr": 0.0011575677019398777, "data_time": 0.00257725715637207, "grad_norm": 0.3505641341209412, "loss": 0.46585251986980436, "time": 1.7541587591171264, "epoch": 295, "memory": 35394, "step": 92322}
{"lr": 0.0011565267191023453, "data_time": 0.5309962511062623, "grad_norm": 0.35520039647817614, "loss": 0.4635207712650299, "time": 1.5784997463226318, "epoch": 296, "memory": 35394, "step": 92435}
{"lr": 0.0011556048665529019, "data_time": 0.16712064743041993, "grad_norm": 0.34544832706451417, "loss": 0.46851237416267394, "time": 1.5794775485992432, "epoch": 296, "memory": 35394, "step": 92535}
{"lr": 0.0011546824251740379, "data_time": 0.002832937240600586, "grad_norm": 0.37740744948387145, "loss": 0.4725720763206482, "time": 1.5979555368423461, "epoch": 296, "memory": 35394, "step": 92635}
{"lr": 0.0011536393597070334, "data_time": 0.5984900951385498, "grad_norm": 0.3172915160655975, "loss": 0.4677847266197205, "time": 1.6205337285995483, "epoch": 297, "memory": 35394, "step": 92748}
{"lr": 0.0011527156692143301, "data_time": 0.33566133975982665, "grad_norm": 0.33838420212268827, "loss": 0.4616143316030502, "time": 1.6138256072998047, "epoch": 297, "memory": 35394, "step": 92848}
{"lr": 0.0011517913946762922, "data_time": 0.0033449888229370116, "grad_norm": 0.3881963163614273, "loss": 0.4672669142484665, "time": 1.5705973863601685, "epoch": 297, "memory": 35394, "step": 92948}
{"lr": 0.0011507462635006382, "data_time": 0.606036114692688, "grad_norm": 0.41233448386192323, "loss": 0.46928854286670685, "time": 1.6011975526809692, "epoch": 298, "memory": 35394, "step": 93061}
{"lr": 0.0011498207500489717, "data_time": 0.21345171928405762, "grad_norm": 0.3668887048959732, "loss": 0.4675620675086975, "time": 1.5872506856918336, "epoch": 298, "memory": 35394, "step": 93161}
{"lr": 0.001148894657345533, "data_time": 0.002958822250366211, "grad_norm": 0.3531944125890732, "loss": 0.4711595982313156, "time": 1.6023502588272094, "epoch": 298, "memory": 35394, "step": 93261}
{"lr": 0.0011478474774155682, "data_time": 0.595347261428833, "grad_norm": 0.44405422210693357, "loss": 0.4707094728946686, "time": 1.5911672115325928, "epoch": 299, "memory": 35394, "step": 93374}
{"lr": 0.0011469201560188103, "data_time": 0.10502481460571289, "grad_norm": 0.381079238653183, "loss": 0.46493751406669614, "time": 1.6430301427841187, "epoch": 299, "memory": 35394, "step": 93474}
{"lr": 0.0011459922601732384, "data_time": 0.002877330780029297, "grad_norm": 0.34249959886074066, "loss": 0.47053423523902893, "time": 1.6074874639511108, "epoch": 299, "memory": 35394, "step": 93574}
{"lr": 0.0011449430484765325, "data_time": 0.5910256385803223, "grad_norm": 0.35524071753025055, "loss": 0.46881082057952883, "time": 1.5997172117233276, "epoch": 300, "memory": 35394, "step": 93687}
{"lr": 0.0011440139341778852, "data_time": 0.11226377487182618, "grad_norm": 0.333830800652504, "loss": 0.46107131242752075, "time": 1.5733336925506591, "epoch": 300, "memory": 35394, "step": 93787}
{"lr": 0.0011430842502426987, "data_time": 0.0036130905151367187, "grad_norm": 0.39849817752838135, "loss": 0.4616857886314392, "time": 1.585910439491272, "epoch": 300, "memory": 35394, "step": 93887}
{"lr": 0.0011420330237997785, "data_time": 0.44316878318786623, "grad_norm": 0.37567560076713563, "loss": 0.4569307744503021, "time": 1.606713843345642, "epoch": 301, "memory": 35394, "step": 94000}
{"lr": 0.0011411021316715256, "data_time": 0.13624961376190187, "grad_norm": 0.38967066407203677, "loss": 0.46667027473449707, "time": 1.564359474182129, "epoch": 301, "memory": 35394, "step": 94100}
{"lr": 0.0011401706747282527, "data_time": 0.00363306999206543, "grad_norm": 0.3642305225133896, "loss": 0.4683549374341965, "time": 1.6779764413833618, "epoch": 301, "memory": 35394, "step": 94200}
{"lr": 0.0011391174505923347, "data_time": 0.6072432518005371, "grad_norm": 0.3350054442882538, "loss": 0.46487840414047243, "time": 1.6339091300964355, "epoch": 302, "memory": 35394, "step": 94313}
{"lr": 0.0011381847957356022, "data_time": 0.2090998411178589, "grad_norm": 0.3540161222219467, "loss": 0.46816551089286806, "time": 1.6446447849273682, "epoch": 302, "memory": 35394, "step": 94413}
{"lr": 0.0011372515808945273, "data_time": 0.0031813621520996095, "grad_norm": 0.29383237808942797, "loss": 0.46113050580024717, "time": 1.591700005531311, "epoch": 302, "memory": 35394, "step": 94513}
{"lr": 0.0011361963761512292, "data_time": 0.4952124834060669, "grad_norm": 0.3676750659942627, "loss": 0.46621440052986146, "time": 1.6334352016448974, "epoch": 303, "memory": 35394, "step": 94626}
{"lr": 0.0011352619736957354, "data_time": 0.13430461883544922, "grad_norm": 0.3157822072505951, "loss": 0.46456317901611327, "time": 1.650476861000061, "epoch": 303, "memory": 35394, "step": 94726}
{"lr": 0.0011343270160956642, "data_time": 0.004764580726623535, "grad_norm": 0.3498678386211395, "loss": 0.46500890552997587, "time": 1.6553795099258424, "epoch": 303, "memory": 35394, "step": 94826}
{"lr": 0.00113326984786274, "data_time": 0.5900580883026123, "grad_norm": 0.379319629073143, "loss": 0.4640802353620529, "time": 1.6651525259017945, "epoch": 304, "memory": 35394, "step": 94939}
{"lr": 0.0011323337129665594, "data_time": 0.29657959938049316, "grad_norm": 0.3843678668141365, "loss": 0.46149422526359557, "time": 1.6401970863342286, "epoch": 304, "memory": 35394, "step": 95039}
{"lr": 0.001131397027774569, "data_time": 0.003120613098144531, "grad_norm": 0.35962260961532594, "loss": 0.4603602409362793, "time": 1.6639514684677124, "epoch": 304, "memory": 35394, "step": 95139}
{"lr": 0.0011303379132016184, "data_time": 0.43938984870910647, "grad_norm": 0.40504640340805054, "loss": 0.461254808306694, "time": 1.6766899347305297, "epoch": 305, "memory": 35394, "step": 95252}
{"lr": 0.0011294000610509217, "data_time": 0.0026125431060791014, "grad_norm": 0.37473517954349517, "loss": 0.46524520218372345, "time": 1.6454989433288574, "epoch": 305, "memory": 35394, "step": 95352}
{"lr": 0.0011284616634621141, "data_time": 0.0032041072845458984, "grad_norm": 0.32711775600910187, "loss": 0.46352098882198334, "time": 1.6450801134109496, "epoch": 305, "memory": 35394, "step": 95452}
{"lr": 0.0011274006197303137, "data_time": 0.6045400142669678, "grad_norm": 0.35700735449790955, "loss": 0.4683313429355621, "time": 1.5939388990402221, "epoch": 306, "memory": 35394, "step": 95565}
{"lr": 0.0011264610655391367, "data_time": 0.3181952953338623, "grad_norm": 0.38011226803064346, "loss": 0.4732223302125931, "time": 1.6278181076049805, "epoch": 306, "memory": 35394, "step": 95665}
{"lr": 0.0011255209707763903, "data_time": 0.0029214859008789063, "grad_norm": 0.3505801796913147, "loss": 0.46690449118614197, "time": 1.4929169416427612, "epoch": 306, "memory": 35394, "step": 95765}
{"lr": 0.0011244580150982116, "data_time": 0.4215764284133911, "grad_norm": 0.3815839231014252, "loss": 0.4644051790237427, "time": 1.4896108150482177, "epoch": 307, "memory": 35394, "step": 95878}
{"lr": 0.0011235167741081933, "data_time": 0.110443377494812, "grad_norm": 0.33444644063711165, "loss": 0.46479482352733614, "time": 1.6105295658111571, "epoch": 307, "memory": 35394, "step": 95978}
{"lr": 0.0011225749974219234, "data_time": 0.0031298160552978515, "grad_norm": 0.3915285557508469, "loss": 0.4700177401304245, "time": 1.7660296440124512, "epoch": 307, "memory": 35394, "step": 96078}
{"lr": 0.0011215101470408537, "data_time": 0.4840705871582031, "grad_norm": 0.346040341258049, "loss": 0.46649112105369567, "time": 1.5245156764984131, "epoch": 308, "memory": 35394, "step": 96191}
{"lr": 0.0011205672345210025, "data_time": 0.20998787879943848, "grad_norm": 0.3984266996383667, "loss": 0.466405576467514, "time": 1.5798229932785035, "epoch": 308, "memory": 35394, "step": 96291}
{"lr": 0.0011196237911889014, "data_time": 0.2346816062927246, "grad_norm": 0.30659135729074477, "loss": 0.47136775851249696, "time": 1.5717552185058594, "epoch": 308, "memory": 35394, "step": 96391}
{"lr": 0.001118557063379167, "data_time": 0.39914371967315676, "grad_norm": 0.38200839459896085, "loss": 0.4682675898075104, "time": 1.6032427310943604, "epoch": 309, "memory": 35394, "step": 96504}
{"lr": 0.0011176124946256079, "data_time": 0.12008843421936036, "grad_norm": 0.40252560675144194, "loss": 0.470858097076416, "time": 1.6116538286209106, "epoch": 309, "memory": 35394, "step": 96604}
{"lr": 0.0011166673999524143, "data_time": 0.0028982162475585938, "grad_norm": 0.3823351114988327, "loss": 0.46400571763515475, "time": 1.5818591356277465, "epoch": 309, "memory": 35394, "step": 96704}
{"lr": 0.001115598812018695, "data_time": 0.2540755033493042, "grad_norm": 0.33831357806921003, "loss": 0.46479673087596896, "time": 1.71281099319458, "epoch": 310, "memory": 35394, "step": 96817}
{"lr": 0.0011146526023544157, "data_time": 0.0027942895889282227, "grad_norm": 0.32838728129863737, "loss": 0.46251570582389834, "time": 1.591313409805298, "epoch": 310, "memory": 35394, "step": 96917}
{"lr": 0.0011137058716716506, "data_time": 0.003394031524658203, "grad_norm": 0.3490890100598335, "loss": 0.4706921368837357, "time": 1.713346791267395, "epoch": 310, "memory": 35394, "step": 97017}
{"lr": 0.0011126354409487988, "data_time": 0.39335970878601073, "grad_norm": 0.37133447229862215, "loss": 0.4621105432510376, "time": 1.5637678861618043, "epoch": 311, "memory": 35394, "step": 97130}
{"lr": 0.0011116876057234084, "data_time": 0.10347709655761719, "grad_norm": 0.37880765199661254, "loss": 0.4639422595500946, "time": 1.598440957069397, "epoch": 311, "memory": 35394, "step": 97230}
{"lr": 0.001110739254389131, "data_time": 0.0028684139251708984, "grad_norm": 0.32672569155693054, "loss": 0.4629657417535782, "time": 1.55874924659729, "epoch": 311, "memory": 35394, "step": 97330}
{"lr": 0.0011096669982418985, "data_time": 0.6588788509368897, "grad_norm": 0.4154163062572479, "loss": 0.47298618257045744, "time": 1.610899567604065, "epoch": 312, "memory": 35394, "step": 97443}
{"lr": 0.001108717552831378, "data_time": 0.21188883781433104, "grad_norm": 0.3578603446483612, "loss": 0.4669059604406357, "time": 1.6026755571365356, "epoch": 312, "memory": 35394, "step": 97543}
{"lr": 0.0011077675962299423, "data_time": 0.004004478454589844, "grad_norm": 0.33907792121171954, "loss": 0.46377475559711456, "time": 1.5762159824371338, "epoch": 312, "memory": 35394, "step": 97643}
{"lr": 0.0011066935320526888, "data_time": 0.6499399423599244, "grad_norm": 0.4293289840221405, "loss": 0.4620305985212326, "time": 1.615049695968628, "epoch": 313, "memory": 35394, "step": 97756}
{"lr": 0.0011057424918591383, "data_time": 0.22910313606262206, "grad_norm": 0.37025160193443296, "loss": 0.464805606007576, "time": 1.6034706354141235, "epoch": 313, "memory": 35394, "step": 97856}
{"lr": 0.0011047909454009362, "data_time": 0.003586077690124512, "grad_norm": 0.37105808556079867, "loss": 0.4649876683950424, "time": 1.5901908874511719, "epoch": 313, "memory": 35394, "step": 97956}
{"lr": 0.001103715090617356, "data_time": 0.6585545778274536, "grad_norm": 0.3750958889722824, "loss": 0.46179133653640747, "time": 1.596438503265381, "epoch": 314, "memory": 35394, "step": 98069}
{"lr": 0.001102762471068746, "data_time": 0.3602017641067505, "grad_norm": 0.3890730917453766, "loss": 0.4656057506799698, "time": 1.6135340929031372, "epoch": 314, "memory": 35394, "step": 98169}
{"lr": 0.001101809350189963, "data_time": 0.06701123714447021, "grad_norm": 0.3960627496242523, "loss": 0.46303985714912416, "time": 1.5760614395141601, "epoch": 314, "memory": 35394, "step": 98269}
{"lr": 0.0011007317222527914, "data_time": 0.47241802215576173, "grad_norm": 0.3592590719461441, "loss": 0.46603297591209414, "time": 1.5634577989578247, "epoch": 315, "memory": 35394, "step": 98382}
{"lr": 0.001099777538802716, "data_time": 0.2837002038955688, "grad_norm": 0.34163897931575776, "loss": 0.4665779650211334, "time": 1.5611371994018555, "epoch": 315, "memory": 35394, "step": 98482}
{"lr": 0.0010988228589650737, "data_time": 0.004003167152404785, "grad_norm": 0.3145834982395172, "loss": 0.46153536140918733, "time": 1.630043339729309, "epoch": 315, "memory": 35394, "step": 98582}
{"lr": 0.0010977434753558187, "data_time": 0.5657073259353638, "grad_norm": 0.37737737894058226, "loss": 0.4669494152069092, "time": 1.5646659851074218, "epoch": 316, "memory": 35394, "step": 98695}
{"lr": 0.0010967877434832386, "data_time": 0.194036865234375, "grad_norm": 0.4161744862794876, "loss": 0.46846421957015993, "time": 1.5723948001861572, "epoch": 316, "memory": 35394, "step": 98795}
{"lr": 0.0010958315201737552, "data_time": 0.0035933732986450197, "grad_norm": 0.43328510522842406, "loss": 0.4664421766996384, "time": 1.545757031440735, "epoch": 316, "memory": 35394, "step": 98895}
{"lr": 0.0010947503984024012, "data_time": 0.4624741315841675, "grad_norm": 0.4194188952445984, "loss": 0.46344874799251556, "time": 1.6122533559799195, "epoch": 317, "memory": 35394, "step": 99008}
{"lr": 0.0010937931336113967, "data_time": 0.15626492500305175, "grad_norm": 0.37449073791503906, "loss": 0.46662144362926483, "time": 1.5781793355941773, "epoch": 317, "memory": 35394, "step": 99108}
{"lr": 0.0010928353823421231, "data_time": 0.0032120466232299803, "grad_norm": 0.38456166684627535, "loss": 0.46186742186546326, "time": 1.5687747478485108, "epoch": 317, "memory": 35394, "step": 99208}
{"lr": 0.0010917525399468503, "data_time": 0.31862540245056153, "grad_norm": 0.3665389806032181, "loss": 0.4593781292438507, "time": 1.430210828781128, "epoch": 318, "memory": 35394, "step": 99321}
{"lr": 0.00109079375776637, "data_time": 0.11305255889892578, "grad_norm": 0.3560570627450943, "loss": 0.4664315521717072, "time": 1.610615348815918, "epoch": 318, "memory": 35394, "step": 99421}
{"lr": 0.0010898344940741462, "data_time": 0.0033539533615112305, "grad_norm": 0.4189566820859909, "loss": 0.4602026343345642, "time": 1.598944354057312, "epoch": 318, "memory": 35394, "step": 99521}
{"lr": 0.001088749948621051, "data_time": 0.4387184143066406, "grad_norm": 0.3948253720998764, "loss": 0.46758320927619934, "time": 1.5718290090560914, "epoch": 319, "memory": 35394, "step": 99634}
{"lr": 0.0010877896646046608, "data_time": 0.2479219913482666, "grad_norm": 0.43967366218566895, "loss": 0.4679734617471695, "time": 1.6480932235717773, "epoch": 319, "memory": 35394, "step": 99734}
{"lr": 0.001086828904050861, "data_time": 0.0028477907180786133, "grad_norm": 0.4127994656562805, "loss": 0.4662721395492554, "time": 1.6055541276931762, "epoch": 319, "memory": 35394, "step": 99834}
{"lr": 0.0010857426731336632, "data_time": 0.5955863237380982, "grad_norm": 0.3813270300626755, "loss": 0.4649563908576965, "time": 1.5734501838684083, "epoch": 320, "memory": 35394, "step": 99947}
{"lr": 0.0010847809028592858, "data_time": 0.1728292226791382, "grad_norm": 0.43420071005821226, "loss": 0.453784304857254, "time": 1.5845251560211182, "epoch": 320, "memory": 35394, "step": 100047}
{"lr": 0.0010838186610295695, "data_time": 0.0033464908599853517, "grad_norm": 0.4163419961929321, "loss": 0.46945986747741697, "time": 1.5581450462341309, "epoch": 320, "memory": 35394, "step": 100147}
{"lr": 0.0010827307622693328, "data_time": 0.549060606956482, "grad_norm": 0.3746534615755081, "loss": 0.469191899895668, "time": 1.565065288543701, "epoch": 321, "memory": 35394, "step": 100260}
{"lr": 0.0010817675213390064, "data_time": 0.08828883171081543, "grad_norm": 0.4188405603170395, "loss": 0.46526154279708865, "time": 1.62258780002594, "epoch": 321, "memory": 35394, "step": 100360}
{"lr": 0.001080803813843059, "data_time": 0.003198695182800293, "grad_norm": 0.3735703259706497, "loss": 0.46121010184288025, "time": 1.5731530904769897, "epoch": 321, "memory": 35394, "step": 100460}
{"lr": 0.0010797142648879036, "data_time": 0.6164714336395264, "grad_norm": 0.3692223891615868, "loss": 0.46988552510738374, "time": 1.6094616174697876, "epoch": 322, "memory": 35394, "step": 100573}
{"lr": 0.001078749568927524, "data_time": 0.32594273090362547, "grad_norm": 0.3868699580430984, "loss": 0.4668603211641312, "time": 1.5103492259979248, "epoch": 322, "memory": 35394, "step": 100673}
{"lr": 0.0010777844113988068, "data_time": 0.1432788848876953, "grad_norm": 0.37165028154850005, "loss": 0.46363005936145785, "time": 1.8281217098236084, "epoch": 322, "memory": 35394, "step": 100773}
{"lr": 0.001076693229923621, "data_time": 0.5379418849945068, "grad_norm": 0.3448874205350876, "loss": 0.46631833016872404, "time": 1.5531907558441163, "epoch": 323, "memory": 35394, "step": 100886}
{"lr": 0.0010757270945826855, "data_time": 0.33695809841156005, "grad_norm": 0.34327917993068696, "loss": 0.47041788697242737, "time": 1.578798532485962, "epoch": 323, "memory": 35394, "step": 100986}
{"lr": 0.001074760502678186, "data_time": 0.0021294116973876952, "grad_norm": 0.3591412156820297, "loss": 0.4660771578550339, "time": 1.5364790439605713, "epoch": 323, "memory": 35394, "step": 101086}
{"lr": 0.0010736677063843418, "data_time": 0.602454948425293, "grad_norm": 0.35496382117271424, "loss": 0.4665955245494843, "time": 1.5889622926712037, "epoch": 324, "memory": 35394, "step": 101199}
{"lr": 0.0010727001473356977, "data_time": 0.2588163137435913, "grad_norm": 0.42187689542770385, "loss": 0.4650532752275467, "time": 1.469957709312439, "epoch": 324, "memory": 35394, "step": 101299}
{"lr": 0.00107173213673567, "data_time": 0.003062796592712402, "grad_norm": 0.4050401300191879, "loss": 0.46602670252323153, "time": 1.5490163564682007, "epoch": 324, "memory": 35394, "step": 101399}
{"lr": 0.0010706377433507344, "data_time": 0.30101947784423827, "grad_norm": 0.3814901620149612, "loss": 0.4620720386505127, "time": 1.6124795436859132, "epoch": 325, "memory": 35394, "step": 101512}
{"lr": 0.0010696687762903265, "data_time": 0.060616588592529295, "grad_norm": 0.4306986540555954, "loss": 0.4646846175193787, "time": 1.58689124584198, "epoch": 325, "memory": 35394, "step": 101612}
{"lr": 0.0010686993626980385, "data_time": 0.003247380256652832, "grad_norm": 0.38934452533721925, "loss": 0.46244565546512606, "time": 1.5524820804595947, "epoch": 325, "memory": 35394, "step": 101712}
{"lr": 0.0010676033899754923, "data_time": 0.48831782341003416, "grad_norm": 0.38516268134117126, "loss": 0.45732439756393434, "time": 1.578361201286316, "epoch": 326, "memory": 35394, "step": 101825}
{"lr": 0.0010666330306221047, "data_time": 0.10199949741363526, "grad_norm": 0.4243548035621643, "loss": 0.4672022372484207, "time": 1.57093346118927, "epoch": 326, "memory": 35394, "step": 101925}
{"lr": 0.0010656622297635866, "data_time": 0.0034598350524902345, "grad_norm": 0.37963709980249405, "loss": 0.46140505075454713, "time": 1.459763479232788, "epoch": 326, "memory": 35394, "step": 102025}
{"lr": 0.0010645646954825223, "data_time": 0.49443774223327636, "grad_norm": 0.3762013614177704, "loss": 0.4563371539115906, "time": 1.5123244285583497, "epoch": 327, "memory": 35394, "step": 102138}
{"lr": 0.0010635929595775252, "data_time": 0.25528154373168943, "grad_norm": 0.4082562178373337, "loss": 0.4615627735853195, "time": 1.585946536064148, "epoch": 327, "memory": 35394, "step": 102238}
{"lr": 0.0010626207872013094, "data_time": 0.0028333425521850585, "grad_norm": 0.4243848085403442, "loss": 0.4698286533355713, "time": 1.5577751874923706, "epoch": 327, "memory": 35394, "step": 102338}
{"lr": 0.001061521709166159, "data_time": 0.5525863885879516, "grad_norm": 0.3948660731315613, "loss": 0.4704106032848358, "time": 1.533701753616333, "epoch": 328, "memory": 35394, "step": 102451}
{"lr": 0.0010605486124732543, "data_time": 0.3099102258682251, "grad_norm": 0.3627870470285416, "loss": 0.45612179636955263, "time": 1.5192505836486816, "epoch": 328, "memory": 35394, "step": 102551}
{"lr": 0.0010595750843501225, "data_time": 0.0038123846054077147, "grad_norm": 0.3910427808761597, "loss": 0.46681106388568877, "time": 1.5803073167800903, "epoch": 328, "memory": 35394, "step": 102651}
{"lr": 0.0010584744803903549, "data_time": 0.3195213317871094, "grad_norm": 0.40896145850419996, "loss": 0.46973631978034974, "time": 1.5332972764968873, "epoch": 329, "memory": 35394, "step": 102764}
{"lr": 0.001057500038695318, "data_time": 0.1423097848892212, "grad_norm": 0.4199318528175354, "loss": 0.46433394253253935, "time": 1.6233702659606934, "epoch": 329, "memory": 35394, "step": 102864}
{"lr": 0.0010565251706180453, "data_time": 0.004013872146606446, "grad_norm": 0.379483063519001, "loss": 0.4677238047122955, "time": 1.5524043560028076, "epoch": 329, "memory": 35394, "step": 102964}
{"lr": 0.0010554230585878897, "data_time": 0.2730687141418457, "grad_norm": 0.3578435957431793, "loss": 0.4597177982330322, "time": 1.528912901878357, "epoch": 330, "memory": 35394, "step": 103077}
{"lr": 0.0010544472876983183, "data_time": 0.002466583251953125, "grad_norm": 0.3673362761735916, "loss": 0.46414364576339723, "time": 1.576320481300354, "epoch": 330, "memory": 35394, "step": 103177}
{"lr": 0.001053471095481417, "data_time": 0.0026270151138305664, "grad_norm": 0.4102741479873657, "loss": 0.47091374099254607, "time": 1.5257390975952148, "epoch": 330, "memory": 35394, "step": 103277}
{"lr": 0.0010523674932595613, "data_time": 0.6220753908157348, "grad_norm": 0.3618732988834381, "loss": 0.46602738201618193, "time": 1.564963173866272, "epoch": 331, "memory": 35394, "step": 103390}
{"lr": 0.0010513904090046129, "data_time": 0.19743747711181642, "grad_norm": 0.42470532953739165, "loss": 0.4589834868907928, "time": 1.5249481916427612, "epoch": 331, "memory": 35394, "step": 103490}
{"lr": 0.0010504129084840763, "data_time": 0.011739182472229003, "grad_norm": 0.37284826636314394, "loss": 0.4624605268239975, "time": 1.57780978679657, "epoch": 331, "memory": 35394, "step": 103590}
{"lr": 0.001049307833973387, "data_time": 0.4412529706954956, "grad_norm": 0.3568493723869324, "loss": 0.46063418686389923, "time": 1.5498072147369384, "epoch": 332, "memory": 35394, "step": 103703}
{"lr": 0.0010483294522035246, "data_time": 0.16139740943908693, "grad_norm": 0.33610638976097107, "loss": 0.4658888399600983, "time": 1.5857145309448242, "epoch": 332, "memory": 35394, "step": 103803}
{"lr": 0.0010473506592365673, "data_time": 0.2103428363800049, "grad_norm": 0.43291020691394805, "loss": 0.45428032279014585, "time": 1.5649926900863647, "epoch": 332, "memory": 35394, "step": 103903}
{"lr": 0.0010462441303637894, "data_time": 0.6027519464492798, "grad_norm": 0.3813130110502243, "loss": 0.46278384029865266, "time": 1.5578605651855468, "epoch": 333, "memory": 35394, "step": 104016}
{"lr": 0.0010452644669505246, "data_time": 0.28741159439086916, "grad_norm": 0.33601692914962766, "loss": 0.4690555900335312, "time": 1.5802296638488769, "epoch": 333, "memory": 35394, "step": 104116}
{"lr": 0.0010442843974153308, "data_time": 0.002258157730102539, "grad_norm": 0.40541493594646455, "loss": 0.46229898631572724, "time": 1.5156422853469849, "epoch": 333, "memory": 35394, "step": 104216}
{"lr": 0.0010431764321308082, "data_time": 0.5887808561325073, "grad_norm": 0.39292977154254916, "loss": 0.46119981408119204, "time": 1.5277330875396729, "epoch": 334, "memory": 35394, "step": 104329}
{"lr": 0.001042195502966443, "data_time": 0.25780954360961916, "grad_norm": 0.34963718950748446, "loss": 0.46493537724018097, "time": 1.5590952157974243, "epoch": 334, "memory": 35394, "step": 104429}
{"lr": 0.0010412141727619046, "data_time": 0.002875828742980957, "grad_norm": 0.4186481773853302, "loss": 0.4597256362438202, "time": 1.5562276363372802, "epoch": 334, "memory": 35394, "step": 104529}
{"lr": 0.0010401047890392788, "data_time": 0.45713186264038086, "grad_norm": 0.41126432418823244, "loss": 0.46926847100257874, "time": 1.5303464889526368, "epoch": 335, "memory": 35394, "step": 104642}
{"lr": 0.0010391226100366537, "data_time": 0.06174843311309815, "grad_norm": 0.4046615660190582, "loss": 0.46662019193172455, "time": 1.2373833417892457, "epoch": 335, "memory": 35394, "step": 104742}
{"lr": 0.0010381400350821125, "data_time": 0.002806258201599121, "grad_norm": 0.43601652681827546, "loss": 0.45707003176212313, "time": 1.505706262588501, "epoch": 335, "memory": 35394, "step": 104842}
{"lr": 0.0010370292509180364, "data_time": 0.5388127326965332, "grad_norm": 0.3851629912853241, "loss": 0.46516062021255494, "time": 1.6119693756103515, "epoch": 336, "memory": 35394, "step": 104955}
{"lr": 0.001036045838010261, "data_time": 0.002202415466308594, "grad_norm": 0.4423207700252533, "loss": 0.46594800651073454, "time": 1.5065721988677978, "epoch": 336, "memory": 35394, "step": 105055}
{"lr": 0.001035062034245253, "data_time": 0.002547621726989746, "grad_norm": 0.40462978184223175, "loss": 0.461365669965744, "time": 1.5050801038742065, "epoch": 336, "memory": 35394, "step": 105155}
{"lr": 0.0010339498676590966, "data_time": 0.40437228679656984, "grad_norm": 0.3859026819467545, "loss": 0.4694820880889893, "time": 1.508707857131958, "epoch": 337, "memory": 35394, "step": 105268}
{"lr": 0.0010329652367992962, "data_time": 0.06195812225341797, "grad_norm": 0.3961109399795532, "loss": 0.46793548166751864, "time": 1.5889784812927246, "epoch": 337, "memory": 35394, "step": 105368}
{"lr": 0.0010319802201832877, "data_time": 0.004070281982421875, "grad_norm": 0.363930182158947, "loss": 0.46462478339672086, "time": 1.520976686477661, "epoch": 337, "memory": 35394, "step": 105468}
{"lr": 0.0010308666892168505, "data_time": 0.4849647045135498, "grad_norm": 0.41616474390029906, "loss": 0.4614913076162338, "time": 1.5639700174331665, "epoch": 338, "memory": 35394, "step": 105581}
{"lr": 0.0010298808563779147, "data_time": 0.15137991905212403, "grad_norm": 0.37693614065647124, "loss": 0.4644263207912445, "time": 1.5333890914916992, "epoch": 338, "memory": 35394, "step": 105681}
{"lr": 0.0010288946428900505, "data_time": 0.003957486152648926, "grad_norm": 0.3986580103635788, "loss": 0.46289565563201907, "time": 1.5650322437286377, "epoch": 338, "memory": 35394, "step": 105781}
{"lr": 0.0010277797656072611, "data_time": 0.4089135885238647, "grad_norm": 0.43920776844024656, "loss": 0.4688908040523529, "time": 1.4772510290145875, "epoch": 339, "memory": 35394, "step": 105894}
{"lr": 0.0010267927467815714, "data_time": 0.1116481065750122, "grad_norm": 0.42599636912345884, "loss": 0.4638500541448593, "time": 1.4941771268844604, "epoch": 339, "memory": 35394, "step": 105994}
{"lr": 0.0010258053524204106, "data_time": 0.0035663604736328124, "grad_norm": 0.44380304515361785, "loss": 0.4628898352384567, "time": 1.2394461393356324, "epoch": 339, "memory": 35394, "step": 106094}
{"lr": 0.0010246891469070422, "data_time": 0.45356526374816897, "grad_norm": 0.41261995434761045, "loss": 0.46047812700271606, "time": 1.4920245170593263, "epoch": 340, "memory": 35394, "step": 106207}
{"lr": 0.0010237009581062263, "data_time": 0.18730523586273193, "grad_norm": 0.4187063932418823, "loss": 0.4710661381483078, "time": 1.5596744298934937, "epoch": 340, "memory": 35394, "step": 106307}
{"lr": 0.0010227123988894862, "data_time": 0.0026226043701171875, "grad_norm": 0.42411749958992007, "loss": 0.46275998950004577, "time": 1.5604679346084596, "epoch": 340, "memory": 35394, "step": 106407}
{"lr": 0.0010215948832528537, "data_time": 0.5508625030517578, "grad_norm": 0.39628576338291166, "loss": 0.4637805938720703, "time": 1.517643189430237, "epoch": 341, "memory": 35394, "step": 106520}
{"lr": 0.0010206055405075154, "data_time": 0.13622517585754396, "grad_norm": 0.3853527903556824, "loss": 0.4637556612491608, "time": 1.5267149686813355, "epoch": 341, "memory": 35394, "step": 106620}
{"lr": 0.0010196158324718096, "data_time": 0.0026435136795043947, "grad_norm": 0.5014408200979232, "loss": 0.4701476961374283, "time": 1.4971852064132691, "epoch": 341, "memory": 35394, "step": 106720}
{"lr": 0.0010184970248404836, "data_time": 0.5551523208618164, "grad_norm": 0.34449985027313235, "loss": 0.46357238590717315, "time": 1.4994813680648804, "epoch": 342, "memory": 35394, "step": 106833}
{"lr": 0.0010175065441999432, "data_time": 0.47304158210754393, "grad_norm": 0.4349937468767166, "loss": 0.4663950353860855, "time": 1.517259693145752, "epoch": 342, "memory": 35394, "step": 106933}
{"lr": 0.00101651570340052, "data_time": 0.16079275608062743, "grad_norm": 0.42742009460926056, "loss": 0.4608080625534058, "time": 1.5140763521194458, "epoch": 342, "memory": 35394, "step": 107033}
{"lr": 0.0010153956219240308, "data_time": 0.4967890739440918, "grad_norm": 0.44153530299663546, "loss": 0.4671074658632278, "time": 1.4906818151474, "epoch": 343, "memory": 35394, "step": 107146}
{"lr": 0.0010144040194560748, "data_time": 0.43166394233703614, "grad_norm": 0.3920616149902344, "loss": 0.4564447462558746, "time": 1.506558108329773, "epoch": 343, "memory": 35394, "step": 107246}
{"lr": 0.0010134120619665577, "data_time": 0.30141401290893555, "grad_norm": 0.42182809114456177, "loss": 0.46618005633354187, "time": 1.5034165382385254, "epoch": 343, "memory": 35394, "step": 107346}
{"lr": 0.0010122907248150976, "data_time": 0.5537262201309204, "grad_norm": 0.470203098654747, "loss": 0.4632301479578018, "time": 1.5309632301330567, "epoch": 344, "memory": 35394, "step": 107459}
{"lr": 0.0010112980166057058, "data_time": 0.0773686408996582, "grad_norm": 0.41266492009162903, "loss": 0.4601149022579193, "time": 1.5392925262451171, "epoch": 344, "memory": 35394, "step": 107559}
{"lr": 0.001010304958517838, "data_time": 0.0035264968872070314, "grad_norm": 0.41622351109981537, "loss": 0.4614451080560684, "time": 1.5070173263549804, "epoch": 344, "memory": 35394, "step": 107659}
{"lr": 0.001009182383881975, "data_time": 0.5887540102005004, "grad_norm": 0.4226402372121811, "loss": 0.46251568794250486, "time": 1.5316792488098145, "epoch": 345, "memory": 35394, "step": 107772}
{"lr": 0.0010081885860350652, "data_time": 0.2137133836746216, "grad_norm": 0.39506767094135287, "loss": 0.4670632898807526, "time": 1.537192463874817, "epoch": 345, "memory": 35394, "step": 107872}
{"lr": 0.0010071944434584387, "data_time": 0.002907848358154297, "grad_norm": 0.42678258419036863, "loss": 0.4636688530445099, "time": 1.511652660369873, "epoch": 345, "memory": 35394, "step": 107972}
{"lr": 0.0010060706495488084, "data_time": 0.44634099006652833, "grad_norm": 0.3891874134540558, "loss": 0.45979406237602233, "time": 1.5024191856384277, "epoch": 346, "memory": 35394, "step": 108085}
{"lr": 0.0010050757781859747, "data_time": 0.013733601570129395, "grad_norm": 0.43500022292137147, "loss": 0.46099613010883334, "time": 1.5773317575454713, "epoch": 346, "memory": 35394, "step": 108185}
{"lr": 0.0010040805672477777, "data_time": 0.002927422523498535, "grad_norm": 0.39663057327270507, "loss": 0.4584677666425705, "time": 1.5148257255554198, "epoch": 346, "memory": 35394, "step": 108285}
{"lr": 0.0010029555722947993, "data_time": 0.4470334529876709, "grad_norm": 0.43977331519126894, "loss": 0.4698309987783432, "time": 1.5575164556503296, "epoch": 347, "memory": 35394, "step": 108398}
{"lr": 0.001001959643555052, "data_time": 0.2850341796875, "grad_norm": 0.3911417692899704, "loss": 0.4643795907497406, "time": 1.5249966859817505, "epoch": 347, "memory": 35394, "step": 108498}
{"lr": 0.001000963380399804, "data_time": 0.08961379528045654, "grad_norm": 0.436871612071991, "loss": 0.4681200861930847, "time": 1.6012983083724976, "epoch": 347, "memory": 35394, "step": 108598}
{"lr": 0.000999837202653378, "data_time": 0.3452359437942505, "grad_norm": 0.38606224209070206, "loss": 0.4690538585186005, "time": 1.5627412796020508, "epoch": 348, "memory": 35394, "step": 108711}
{"lr": 0.0009988402326928793, "data_time": 0.002572965621948242, "grad_norm": 0.3976347088813782, "loss": 0.4631569653749466, "time": 1.2354783058166503, "epoch": 348, "memory": 35394, "step": 108811}
{"lr": 0.0009978429334821656, "data_time": 0.0036057472229003907, "grad_norm": 0.3528527557849884, "loss": 0.4612824022769928, "time": 1.5918197631835938, "epoch": 348, "memory": 35394, "step": 108911}
{"lr": 0.000996715591211383, "data_time": 0.5464663505554199, "grad_norm": 0.39486355185508726, "loss": 0.45524639189243316, "time": 1.532730221748352, "epoch": 349, "memory": 35394, "step": 109024}
{"lr": 0.0009957175962031872, "data_time": 0.07186152935028076, "grad_norm": 0.4718743622303009, "loss": 0.46450090408325195, "time": 1.5724457025527954, "epoch": 349, "memory": 35394, "step": 109124}
{"lr": 0.0009947192771154031, "data_time": 0.0031244754791259766, "grad_norm": 0.4659013211727142, "loss": 0.4647637516260147, "time": 1.5665632963180542, "epoch": 349, "memory": 35394, "step": 109224}
{"lr": 0.0009935907886082427, "data_time": 0.4863803148269653, "grad_norm": 0.37737143635749815, "loss": 0.4595455199480057, "time": 1.5205309391021729, "epoch": 350, "memory": 35394, "step": 109337}
{"lr": 0.0009925917847420323, "data_time": 0.32414224147796633, "grad_norm": 0.44081132113933563, "loss": 0.4694781988859177, "time": 1.5927094221115112, "epoch": 350, "memory": 35394, "step": 109437}
{"lr": 0.000991592461972119, "data_time": 0.0032246589660644533, "grad_norm": 0.4535121709108353, "loss": 0.46292534172534944, "time": 1.5263609409332275, "epoch": 350, "memory": 35394, "step": 109537}
{"lr": 0.0009904628455351578, "data_time": 0.3630881071090698, "grad_norm": 0.410311171412468, "loss": 0.46733447909355164, "time": 1.5123531341552734, "epoch": 351, "memory": 35394, "step": 109650}
{"lr": 0.0009894628490169813, "data_time": 0.12723100185394287, "grad_norm": 0.4437586337327957, "loss": 0.4630185455083847, "time": 1.549399733543396, "epoch": 351, "memory": 35394, "step": 109750}
{"lr": 0.0009884625387761583, "data_time": 0.0032100677490234375, "grad_norm": 0.3950934410095215, "loss": 0.4572657734155655, "time": 1.5112143754959106, "epoch": 351, "memory": 35394, "step": 109850}
{"lr": 0.0009873318127342652, "data_time": 0.5019667148590088, "grad_norm": 0.4123047798871994, "loss": 0.4723826557397842, "time": 1.5341668367385863, "epoch": 352, "memory": 35394, "step": 109963}
{"lr": 0.0009863308397862732, "data_time": 0.30541889667510985, "grad_norm": 0.42828263342380524, "loss": 0.45630755424499514, "time": 1.5559988498687745, "epoch": 352, "memory": 35394, "step": 110063}
{"lr": 0.0009853295583017816, "data_time": 0.003201937675476074, "grad_norm": 0.4472426027059555, "loss": 0.46601228713989257, "time": 1.2253069400787353, "epoch": 352, "memory": 35394, "step": 110163}
{"lr": 0.0009841977409978304, "data_time": 0.27082808017730714, "grad_norm": 0.4231796860694885, "loss": 0.46326313316822054, "time": 1.5333453178405763, "epoch": 353, "memory": 35394, "step": 110276}
{"lr": 0.0009831958078580143, "data_time": 0.00247037410736084, "grad_norm": 0.4011797964572906, "loss": 0.46677531898021696, "time": 1.5305160522460937, "epoch": 353, "memory": 35394, "step": 110376}
{"lr": 0.0009821935713728502, "data_time": 0.003557848930358887, "grad_norm": 0.35786429047584534, "loss": 0.46224203407764436, "time": 1.6783869981765747, "epoch": 353, "memory": 35394, "step": 110476}
{"lr": 0.0009810606811674176, "data_time": 0.4599827527999878, "grad_norm": 0.3715215355157852, "loss": 0.46618525087833407, "time": 1.5425096511840821, "epoch": 354, "memory": 35394, "step": 110589}
{"lr": 0.0009800578040893414, "data_time": 0.15359623432159425, "grad_norm": 0.4021566301584244, "loss": 0.46463834345340727, "time": 1.535776400566101, "epoch": 354, "memory": 35394, "step": 110689}
{"lr": 0.0009790546288619907, "data_time": 0.0025118112564086912, "grad_norm": 0.41213064789772036, "loss": 0.46645379066467285, "time": 1.677100133895874, "epoch": 354, "memory": 35394, "step": 110789}
{"lr": 0.0009779206841330546, "data_time": 0.3227936506271362, "grad_norm": 0.4030286967754364, "loss": 0.45962908267974856, "time": 1.5920029640197755, "epoch": 355, "memory": 35394, "step": 110902}
{"lr": 0.0009769168793856006, "data_time": 0.002009749412536621, "grad_norm": 0.42634175419807435, "loss": 0.4660676270723343, "time": 1.4970984935760498, "epoch": 355, "memory": 35394, "step": 111002}
{"lr": 0.0009759127816897859, "data_time": 0.0028206348419189454, "grad_norm": 0.4021492063999176, "loss": 0.4659657508134842, "time": 1.4983699083328248, "epoch": 355, "memory": 35394, "step": 111102}
{"lr": 0.0009747778008324341, "data_time": 0.4334993839263916, "grad_norm": 0.4691382199525833, "loss": 0.4608391374349594, "time": 1.5600953578948975, "epoch": 356, "memory": 35394, "step": 111215}
{"lr": 0.0009737730846995323, "data_time": 0.09159655570983886, "grad_norm": 0.4559639126062393, "loss": 0.45822775959968565, "time": 1.5257214069366456, "epoch": 356, "memory": 35394, "step": 111315}
{"lr": 0.0009727680808239297, "data_time": 0.0022292852401733397, "grad_norm": 0.4049458473920822, "loss": 0.4630526632070541, "time": 1.5862228870391846, "epoch": 356, "memory": 35394, "step": 111415}
{"lr": 0.000971632082250057, "data_time": 0.3992783784866333, "grad_norm": 0.449620795249939, "loss": 0.46332328021526337, "time": 1.543058156967163, "epoch": 357, "memory": 35394, "step": 111528}
{"lr": 0.0009706264710304203, "data_time": 0.018266129493713378, "grad_norm": 0.48643783628940584, "loss": 0.46243923902511597, "time": 1.638697338104248, "epoch": 357, "memory": 35394, "step": 111628}
{"lr": 0.0009696205772784128, "data_time": 0.002766323089599609, "grad_norm": 0.46353605687618255, "loss": 0.464715039730072, "time": 1.472697949409485, "epoch": 357, "memory": 35394, "step": 111728}
{"lr": 0.0009684835794164235, "data_time": 0.5188203811645508, "grad_norm": 0.47053403258323667, "loss": 0.460000804066658, "time": 1.5335213661193847, "epoch": 358, "memory": 35394, "step": 111841}
{"lr": 0.0009674770894232855, "data_time": 0.07699522972106934, "grad_norm": 0.3557462811470032, "loss": 0.4607481062412262, "time": 1.52695734500885, "epoch": 358, "memory": 35394, "step": 111941}
{"lr": 0.000966470322112689, "data_time": 0.002537655830383301, "grad_norm": 0.40544925928115844, "loss": 0.4608732134103775, "time": 1.5119462490081788, "epoch": 358, "memory": 35394, "step": 112041}
{"lr": 0.0009653323434071991, "data_time": 0.1416222333908081, "grad_norm": 0.4422329246997833, "loss": 0.46524362862110136, "time": 1.229326605796814, "epoch": 359, "memory": 35394, "step": 112154}
{"lr": 0.0009643249909680493, "data_time": 0.16342618465423583, "grad_norm": 0.386290642619133, "loss": 0.45319516956806183, "time": 1.545706820487976, "epoch": 359, "memory": 35394, "step": 112254}
{"lr": 0.0009633173664308548, "data_time": 0.003398489952087402, "grad_norm": 0.3939687967300415, "loss": 0.46104246079921724, "time": 1.5440425157546998, "epoch": 359, "memory": 35394, "step": 112354}
{"lr": 0.0009621784253423903, "data_time": 0.5748590946197509, "grad_norm": 0.43907013833522796, "loss": 0.4623216539621353, "time": 1.5132676839828492, "epoch": 360, "memory": 35394, "step": 112467}
{"lr": 0.0009611702267987099, "data_time": 0.3100965261459351, "grad_norm": 0.4605975568294525, "loss": 0.460000079870224, "time": 1.4993310928344727, "epoch": 360, "memory": 35394, "step": 112567}
{"lr": 0.0009601617613808094, "data_time": 0.08519535064697266, "grad_norm": 0.4086146503686905, "loss": 0.4638223111629486, "time": 1.5317733526229858, "epoch": 360, "memory": 35394, "step": 112667}
{"lr": 0.0009590218763855098, "data_time": 0.30964634418487547, "grad_norm": 0.41502260118722917, "loss": 0.46557595431804655, "time": 1.4730817556381226, "epoch": 361, "memory": 35394, "step": 112780}
{"lr": 0.0009580128480925043, "data_time": 0.24091358184814454, "grad_norm": 0.4369313418865204, "loss": 0.46158032715320585, "time": 1.6308483123779296, "epoch": 361, "memory": 35394, "step": 112880}
{"lr": 0.0009570035581534336, "data_time": 0.0029527902603149413, "grad_norm": 0.4194801390171051, "loss": 0.4660779356956482, "time": 1.523227047920227, "epoch": 361, "memory": 35394, "step": 112980}
{"lr": 0.0009558627477427502, "data_time": 0.4365326642990112, "grad_norm": 0.39006551504135134, "loss": 0.4612432777881622, "time": 1.572304654121399, "epoch": 362, "memory": 35394, "step": 113093}
{"lr": 0.0009548529060690846, "data_time": 0.2449549913406372, "grad_norm": 0.4170659989118576, "loss": 0.45871658623218536, "time": 1.4773321866989135, "epoch": 362, "memory": 35394, "step": 113193}
{"lr": 0.0009538428079817551, "data_time": 0.002709197998046875, "grad_norm": 0.40036257803440095, "loss": 0.4577573299407959, "time": 1.501806640625, "epoch": 362, "memory": 35394, "step": 113293}
{"lr": 0.0009527010906621489, "data_time": 0.46948790550231934, "grad_norm": 0.37626729905605316, "loss": 0.4702855497598648, "time": 1.5314930438995362, "epoch": 363, "memory": 35394, "step": 113406}
{"lr": 0.0009516904519896855, "data_time": 0.00220637321472168, "grad_norm": 0.4299414485692978, "loss": 0.4601961553096771, "time": 1.3150890588760376, "epoch": 363, "memory": 35394, "step": 113506}
{"lr": 0.0009506795621401183, "data_time": 0.002593541145324707, "grad_norm": 0.45425272881984713, "loss": 0.46639522910118103, "time": 1.5396605253219604, "epoch": 363, "memory": 35394, "step": 113606}
{"lr": 0.0009495369564327657, "data_time": 0.3693836212158203, "grad_norm": 0.3681346356868744, "loss": 0.46408439576625826, "time": 1.5644068956375121, "epoch": 364, "memory": 35394, "step": 113719}
{"lr": 0.000948525537156295, "data_time": 0.0851172924041748, "grad_norm": 0.4182677060365677, "loss": 0.4640327751636505, "time": 1.5614375591278076, "epoch": 364, "memory": 35394, "step": 113819}
{"lr": 0.0009475138719433545, "data_time": 0.0032073020935058593, "grad_norm": 0.4019834280014038, "loss": 0.4695825606584549, "time": 1.5540603876113892, "epoch": 364, "memory": 35394, "step": 113919}
{"lr": 0.0009463703963838436, "data_time": 0.43075852394104003, "grad_norm": 0.4494440585374832, "loss": 0.4559011608362198, "time": 1.5144583225250243, "epoch": 365, "memory": 35394, "step": 114032}
{"lr": 0.0009453582129108194, "data_time": 0.42587862014770506, "grad_norm": 0.44662775099277496, "loss": 0.4646132797002792, "time": 1.562855339050293, "epoch": 365, "memory": 35394, "step": 114132}
{"lr": 0.0009443457887459478, "data_time": 0.0022249460220336915, "grad_norm": 0.3974826574325562, "loss": 0.4677015691995621, "time": 1.5120728969573975, "epoch": 365, "memory": 35394, "step": 114232}
{"lr": 0.0009432014618839749, "data_time": 0.5867465734481812, "grad_norm": 0.44486443102359774, "loss": 0.4619572520256042, "time": 1.5225666284561157, "epoch": 366, "memory": 35394, "step": 114345}
{"lr": 0.0009421885306342464, "data_time": 0.20824589729309081, "grad_norm": 0.44237333834171294, "loss": 0.46444631516933443, "time": 1.5040751934051513, "epoch": 366, "memory": 35394, "step": 114445}
{"lr": 0.0009411753639412, "data_time": 0.002844095230102539, "grad_norm": 0.4203830987215042, "loss": 0.4561803787946701, "time": 1.5969104290008544, "epoch": 366, "memory": 35394, "step": 114545}
{"lr": 0.000940030204340273, "data_time": 0.5717183351516724, "grad_norm": 0.47567669451236727, "loss": 0.4623889446258545, "time": 1.563325786590576, "epoch": 367, "memory": 35394, "step": 114658}
{"lr": 0.0009390165417458229, "data_time": 0.28254706859588624, "grad_norm": 0.4005605787038803, "loss": 0.46945696175098417, "time": 1.5186325788497925, "epoch": 367, "memory": 35394, "step": 114758}
{"lr": 0.0009380026489604007, "data_time": 0.002425217628479004, "grad_norm": 0.48489550650119784, "loss": 0.4685944586992264, "time": 1.5442379236221313, "epoch": 367, "memory": 35394, "step": 114858}
{"lr": 0.0009368566751975371, "data_time": 0.37133169174194336, "grad_norm": 0.4144106268882751, "loss": 0.46848849058151243, "time": 1.5465147018432617, "epoch": 368, "memory": 35394, "step": 114971}
{"lr": 0.0009358422977022078, "data_time": 0.12894973754882813, "grad_norm": 0.40040975213050845, "loss": 0.45853726863861083, "time": 1.5270414352416992, "epoch": 368, "memory": 35394, "step": 115071}
{"lr": 0.00093482769527199, "data_time": 0.003630232810974121, "grad_norm": 0.4402478188276291, "loss": 0.46405477821826935, "time": 1.536425232887268, "epoch": 368, "memory": 35394, "step": 115171}
{"lr": 0.0009336809259374136, "data_time": 0.39765219688415526, "grad_norm": 0.3929396092891693, "loss": 0.46522446572780607, "time": 1.5525787353515625, "epoch": 369, "memory": 35394, "step": 115284}
{"lr": 0.00093266584999665, "data_time": 0.11985177993774414, "grad_norm": 0.4447342544794083, "loss": 0.4658224493265152, "time": 1.5100635528564452, "epoch": 369, "memory": 35394, "step": 115384}
{"lr": 0.000931650554380728, "data_time": 0.0030443429946899413, "grad_norm": 0.4091942697763443, "loss": 0.46375539898872375, "time": 1.543971800804138, "epoch": 369, "memory": 35394, "step": 115484}
{"lr": 0.0009305030080775695, "data_time": 0.39283645153045654, "grad_norm": 0.39846491515636445, "loss": 0.46284792125225066, "time": 1.5958204030990601, "epoch": 370, "memory": 35394, "step": 115597}
{"lr": 0.0009294872501581428, "data_time": 0.2824340105056763, "grad_norm": 0.4492764472961426, "loss": 0.46736661195755, "time": 1.6215060949325562, "epoch": 370, "memory": 35394, "step": 115697}
{"lr": 0.0009284712778268515, "data_time": 0.0031549215316772463, "grad_norm": 0.41227970719337464, "loss": 0.4590795814990997, "time": 1.5401161193847657, "epoch": 370, "memory": 35394, "step": 115797}
{"lr": 0.0009273229731708443, "data_time": 0.4594427585601807, "grad_norm": 0.43597129732370377, "loss": 0.46072093546390536, "time": 1.5167099475860595, "epoch": 371, "memory": 35394, "step": 115910}
{"lr": 0.0009263065497505904, "data_time": 0.26187825202941895, "grad_norm": 0.4559649586677551, "loss": 0.4627935141324997, "time": 1.5480322599411012, "epoch": 371, "memory": 35394, "step": 116010}
{"lr": 0.0009252899171852447, "data_time": 0.0033138275146484377, "grad_norm": 0.47080630958080294, "loss": 0.46642602384090426, "time": 1.443002200126648, "epoch": 371, "memory": 35394, "step": 116110}
{"lr": 0.0009241408728044261, "data_time": 0.006382560729980469, "grad_norm": 0.4852666437625885, "loss": 0.46455039978027346, "time": 1.262477970123291, "epoch": 372, "memory": 35394, "step": 116223}
{"lr": 0.0009231238003719766, "data_time": 0.022012734413146974, "grad_norm": 0.4050399214029312, "loss": 0.4603954583406448, "time": 1.4891536235809326, "epoch": 372, "memory": 35394, "step": 116323}
{"lr": 0.0009221065240646009, "data_time": 0.003825545310974121, "grad_norm": 0.4294697821140289, "loss": 0.46426633894443514, "time": 1.5810230493545532, "epoch": 372, "memory": 35394, "step": 116423}
{"lr": 0.0009209567585990075, "data_time": 0.49153239727020265, "grad_norm": 0.4473871052265167, "loss": 0.46230230331420896, "time": 1.5589307308197022, "epoch": 373, "memory": 35394, "step": 116536}
{"lr": 0.0009199390536535218, "data_time": 0.21133155822753907, "grad_norm": 0.4095963388681412, "loss": 0.46514838337898257, "time": 1.5695565462112426, "epoch": 373, "memory": 35394, "step": 116636}
{"lr": 0.0009189211501065829, "data_time": 0.002240633964538574, "grad_norm": Infinity, "loss": 0.4653646260499954, "time": 1.5434551239013672, "epoch": 373, "memory": 35394, "step": 116736}
{"lr": 0.0009177706822079472, "data_time": 0.385371994972229, "grad_norm": 0.418432343006134, "loss": 0.46624964475631714, "time": 1.5400018215179443, "epoch": 374, "memory": 35394, "step": 116849}
{"lr": 0.0009167523612588472, "data_time": 0.04745519161224365, "grad_norm": 0.48629272282123565, "loss": 0.4635626584291458, "time": 1.548146390914917, "epoch": 374, "memory": 35394, "step": 116949}
{"lr": 0.0009157338469849886, "data_time": 0.0028185606002807616, "grad_norm": 0.4145901739597321, "loss": 0.4633753508329391, "time": 1.532832145690918, "epoch": 374, "memory": 35394, "step": 117049}
{"lr": 0.000914582695316441, "data_time": 0.4229182243347168, "grad_norm": Infinity, "loss": 0.46996581852436065, "time": 1.5440080165863037, "epoch": 375, "memory": 35394, "step": 117162}
{"lr": 0.0009135637748831406, "data_time": 0.2751803398132324, "grad_norm": 0.4476743310689926, "loss": 0.4651977866888046, "time": 1.6278005123138428, "epoch": 375, "memory": 35394, "step": 117262}
{"lr": 0.0009125446664049118, "data_time": 0.0021394014358520506, "grad_norm": 0.4663990378379822, "loss": 0.4574527233839035, "time": 1.6048825979232788, "epoch": 375, "memory": 35394, "step": 117362}
{"lr": 0.0009113928496406723, "data_time": 0.5244616985321044, "grad_norm": 0.42453104853630064, "loss": 0.46458604335784914, "time": 1.5903693914413453, "epoch": 376, "memory": 35394, "step": 117475}
{"lr": 0.0009103733462523092, "data_time": 0.17757041454315187, "grad_norm": 0.4077348172664642, "loss": 0.4585042804479599, "time": 1.571786093711853, "epoch": 376, "memory": 35394, "step": 117575}
{"lr": 0.0009093536601018988, "data_time": 0.00298919677734375, "grad_norm": 0.4362431228160858, "loss": 0.4640932410955429, "time": 1.5411760807037354, "epoch": 376, "memory": 35394, "step": 117675}
{"lr": 0.0009082011969269791, "data_time": 0.49874832630157473, "grad_norm": 0.4335537165403366, "loss": 0.4599980652332306, "time": 1.5650612354278564, "epoch": 377, "memory": 35394, "step": 117788}
{"lr": 0.0009071811271221501, "data_time": 0.07703912258148193, "grad_norm": 0.4732524871826172, "loss": 0.46274531483650205, "time": 1.594152021408081, "epoch": 377, "memory": 35394, "step": 117888}
{"lr": 0.0009061608798411197, "data_time": 0.0037622690200805665, "grad_norm": 0.46350246071815493, "loss": 0.4637869268655777, "time": 1.4955947399139404, "epoch": 377, "memory": 35394, "step": 117988}
{"lr": 0.0009050077889510168, "data_time": 0.6060759782791137, "grad_norm": 0.42340431809425355, "loss": 0.46408732831478117, "time": 1.5704133272171021, "epoch": 378, "memory": 35394, "step": 118101}
{"lr": 0.0009039871692775052, "data_time": 0.2080458402633667, "grad_norm": 0.48186841905117034, "loss": 0.4554749667644501, "time": 1.5669753789901733, "epoch": 378, "memory": 35394, "step": 118201}
{"lr": 0.0009029663774165166, "data_time": 0.004485940933227539, "grad_norm": 0.4289487063884735, "loss": 0.4621177315711975, "time": 1.5473581314086915, "epoch": 378, "memory": 35394, "step": 118301}
{"lr": 0.0009018126775169092, "data_time": 0.5017554521560669, "grad_norm": 0.38595977425575256, "loss": 0.45970491468906405, "time": 1.5506050825119018, "epoch": 379, "memory": 35394, "step": 118414}
{"lr": 0.0009007915245314191, "data_time": 0.09715468883514404, "grad_norm": 0.4592561453580856, "loss": 0.46262890100479126, "time": 1.5576029777526856, "epoch": 379, "memory": 35394, "step": 118514}
{"lr": 0.0008997702046499704, "data_time": 0.0027384519577026366, "grad_norm": 0.5438781559467316, "loss": 0.4654615193605423, "time": 1.556428599357605, "epoch": 379, "memory": 35394, "step": 118614}
{"lr": 0.0008986159144564183, "data_time": 0.621553921699524, "grad_norm": 0.3821278512477875, "loss": 0.46491359770298, "time": 1.5934943437576294, "epoch": 380, "memory": 35394, "step": 118727}
{"lr": 0.0008975942447243049, "data_time": 0.11217634677886963, "grad_norm": 0.40795351564884186, "loss": 0.4595035523176193, "time": 1.606296443939209, "epoch": 380, "memory": 35394, "step": 118827}
{"lr": 0.0008965724133904592, "data_time": 0.0024437904357910156, "grad_norm": 0.4356764853000641, "loss": 0.4575859040021896, "time": 1.544035792350769, "epoch": 380, "memory": 35394, "step": 118927}
{"lr": 0.000895417551628097, "data_time": 0.43964860439300535, "grad_norm": 0.44404339492321016, "loss": 0.45720855295658114, "time": 1.5509012699127198, "epoch": 381, "memory": 35394, "step": 119040}
{"lr": 0.0008943953817231, "data_time": 0.08373880386352539, "grad_norm": 0.4542281061410904, "loss": 0.45788887441158294, "time": 1.5681155681610108, "epoch": 381, "memory": 35394, "step": 119140}
{"lr": 0.0008933730555132172, "data_time": 0.0032169580459594726, "grad_norm": 0.43016034960746763, "loss": 0.45703906416893003, "time": 1.5633188486099243, "epoch": 381, "memory": 35394, "step": 119240}
{"lr": 0.0008922176409164525, "data_time": 0.5177857398986816, "grad_norm": 0.4524415820837021, "loss": 0.4612219572067261, "time": 1.5357144594192504, "epoch": 382, "memory": 35394, "step": 119353}
{"lr": 0.0008911949874204233, "data_time": 0.2564464330673218, "grad_norm": 0.43955869078636167, "loss": 0.4625212401151657, "time": 1.5608385801315308, "epoch": 382, "memory": 35394, "step": 119453}
{"lr": 0.0008901721829188909, "data_time": 0.002953910827636719, "grad_norm": 0.477693173289299, "loss": 0.4594775468111038, "time": 1.6115307331085205, "epoch": 382, "memory": 35394, "step": 119553}
{"lr": 0.000889016234231099, "data_time": 0.3731825590133667, "grad_norm": 0.44487647116184237, "loss": 0.4671077489852905, "time": 1.521703338623047, "epoch": 383, "memory": 35394, "step": 119666}
{"lr": 0.0008879931137337345, "data_time": 0.3123467922210693, "grad_norm": 0.43862987756729127, "loss": 0.46536961793899534, "time": 1.6019323348999024, "epoch": 383, "memory": 35394, "step": 119766}
{"lr": 0.0008869698475327009, "data_time": 0.009859538078308106, "grad_norm": 0.3990690976381302, "loss": 0.45579438507556913, "time": 1.5297170400619506, "epoch": 383, "memory": 35394, "step": 119866}
{"lr": 0.0008858133835059211, "data_time": 0.47659251689910886, "grad_norm": 0.42232891321182253, "loss": 0.45977403223514557, "time": 1.5547439575195312, "epoch": 384, "memory": 35394, "step": 119979}
{"lr": 0.0008847898126044963, "data_time": 0.2311382293701172, "grad_norm": 0.49991955161094664, "loss": 0.46614952981472013, "time": 1.6246432065963745, "epoch": 384, "memory": 35394, "step": 120079}
{"lr": 0.0008837661013035966, "data_time": 0.0032429218292236326, "grad_norm": 0.4695946276187897, "loss": 0.4696279913187027, "time": 1.5346534729003907, "epoch": 384, "memory": 35394, "step": 120179}
{"lr": 0.0008826091406982283, "data_time": 0.36138167381286623, "grad_norm": 0.3795508772134781, "loss": 0.46612468659877776, "time": 1.5588590145111083, "epoch": 385, "memory": 35394, "step": 120292}
{"lr": 0.0008815851359973201, "data_time": 0.14227206707000734, "grad_norm": 0.4836321473121643, "loss": 0.4630132794380188, "time": 1.569765305519104, "epoch": 385, "memory": 35394, "step": 120392}
{"lr": 0.0008805609962034115, "data_time": 0.0025652647018432617, "grad_norm": 0.4111705183982849, "loss": 0.46878543198108674, "time": 1.521510696411133, "epoch": 385, "memory": 35394, "step": 120492}
{"lr": 0.0008794035577879113, "data_time": 0.42061171531677244, "grad_norm": 0.41958979070186614, "loss": 0.4651415467262268, "time": 1.584146547317505, "epoch": 386, "memory": 35394, "step": 120605}
{"lr": 0.0008783791358991375, "data_time": 0.002393364906311035, "grad_norm": 0.4054224729537964, "loss": 0.47165962755680085, "time": 1.5500369310379027, "epoch": 386, "memory": 35394, "step": 120705}
{"lr": 0.0008773545842260293, "data_time": 0.002792072296142578, "grad_norm": 0.4801794499158859, "loss": 0.4660118281841278, "time": 1.5098511457443238, "epoch": 386, "memory": 35394, "step": 120805}
{"lr": 0.0008761966867766006, "data_time": 0.3048658609390259, "grad_norm": 0.4450051516294479, "loss": 0.46607237458229067, "time": 1.5359047412872315, "epoch": 387, "memory": 35394, "step": 120918}
{"lr": 0.0008751718643183455, "data_time": 0.04137868881225586, "grad_norm": 0.451106196641922, "loss": 0.46379286646842954, "time": 1.5973040819168092, "epoch": 387, "memory": 35394, "step": 121018}
{"lr": 0.0008741469173865261, "data_time": 0.0022756099700927735, "grad_norm": 0.43306600153446195, "loss": 0.45845346748828886, "time": 1.5350011348724366, "epoch": 387, "memory": 35394, "step": 121118}
{"lr": 0.0008729885796868232, "data_time": 0.6002744674682617, "grad_norm": 0.45083127617836, "loss": 0.4621830374002457, "time": 1.5876834630966186, "epoch": 388, "memory": 35394, "step": 121231}
{"lr": 0.0008719633732839709, "data_time": 1.3813973426818849, "grad_norm": 0.45908714830875397, "loss": 0.4603216141462326, "time": 3.131526255607605, "epoch": 388, "memory": 35394, "step": 121331}
{"lr": 0.0008709380477203434, "data_time": 0.00250096321105957, "grad_norm": 0.4171923816204071, "loss": 0.46786090135574343, "time": 1.5993709087371826, "epoch": 388, "memory": 35394, "step": 121431}
{"lr": 0.0008697792885611612, "data_time": 0.008459973335266113, "grad_norm": 0.505086076259613, "loss": 0.4647446841001511, "time": 1.657163381576538, "epoch": 389, "memory": 35394, "step": 121544}
{"lr": 0.0008687537148448205, "data_time": 0.002130866050720215, "grad_norm": 0.46378070712089536, "loss": 0.4619387358427048, "time": 1.5649109840393067, "epoch": 389, "memory": 35394, "step": 121644}
{"lr": 0.0008677280272824287, "data_time": 0.0030170440673828124, "grad_norm": 0.38474356234073637, "loss": 0.462424173951149, "time": 1.4576466798782348, "epoch": 389, "memory": 35394, "step": 121744}
{"lr": 0.0008665688654613975, "data_time": 0.41276872158050537, "grad_norm": 0.49795512557029725, "loss": 0.46629199385643005, "time": 1.5856497287750244, "epoch": 390, "memory": 35394, "step": 121857}
{"lr": 0.0008655429410686406, "data_time": 0.06110408306121826, "grad_norm": 0.5006564915180206, "loss": 0.46774343252182005, "time": 1.513093638420105, "epoch": 390, "memory": 35394, "step": 121957}
{"lr": 0.0008645169081464005, "data_time": 0.0028969287872314454, "grad_norm": 0.4334981024265289, "loss": 0.46543931365013125, "time": 1.4790306568145752, "epoch": 390, "memory": 35394, "step": 122057}
{"lr": 0.0008633573624676848, "data_time": 0.387064528465271, "grad_norm": 0.3670946225523949, "loss": 0.4644156962633133, "time": 1.5081714630126952, "epoch": 391, "memory": 35394, "step": 122170}
{"lr": 0.0008623311040412679, "data_time": 0.31280059814453126, "grad_norm": 0.4823043704032898, "loss": 0.46646581292152406, "time": 1.5401916742324828, "epoch": 391, "memory": 35394, "step": 122270}
{"lr": 0.0008613047424036982, "data_time": 0.5793584585189819, "grad_norm": Infinity, "loss": 0.46290005147457125, "time": 1.531881046295166, "epoch": 391, "memory": 35394, "step": 122370}
{"lr": 0.0008601448316776877, "data_time": 0.24515676498413086, "grad_norm": 0.44983174204826354, "loss": 0.4600799113512039, "time": 1.5288262844085694, "epoch": 392, "memory": 35394, "step": 122483}
{"lr": 0.0008591182558657887, "data_time": 0.002817106246948242, "grad_norm": 0.45698131918907164, "loss": 0.46058350801467896, "time": 1.4808709621429443, "epoch": 392, "memory": 35394, "step": 122583}
{"lr": 0.0008580915821627418, "data_time": 0.002854323387145996, "grad_norm": 0.4312639608979225, "loss": 0.4638236969709396, "time": 1.4790888786315919, "epoch": 392, "memory": 35394, "step": 122683}
{"lr": 0.0008569313252057501, "data_time": 0.4086392641067505, "grad_norm": 0.4466769814491272, "loss": 0.4604374974966049, "time": 1.4941306352615356, "epoch": 393, "memory": 35394, "step": 122796}
{"lr": 0.0008559044486616946, "data_time": 0.002521991729736328, "grad_norm": 0.4535581052303314, "loss": 0.4642729520797729, "time": 1.6149740934371948, "epoch": 393, "memory": 35394, "step": 122896}
{"lr": 0.0008548774795480848, "data_time": 0.0023515939712524412, "grad_norm": 0.4182357281446457, "loss": 0.4600472867488861, "time": 1.584617805480957, "epoch": 393, "memory": 35394, "step": 122996}
{"lr": 0.0008537168951820409, "data_time": 0.21658790111541748, "grad_norm": 0.43840753436088564, "loss": 0.457282093167305, "time": 1.4616015195846557, "epoch": 394, "memory": 35394, "step": 123109}
{"lr": 0.0008526897345640338, "data_time": 0.07592391967773438, "grad_norm": 0.42629809081554415, "loss": 0.4670282244682312, "time": 1.5150848388671876, "epoch": 394, "memory": 35394, "step": 123209}
{"lr": 0.0008516624866995666, "data_time": 0.0030652523040771485, "grad_norm": 0.4351537823677063, "loss": 0.4581620156764984, "time": 1.5466849088668824, "epoch": 394, "memory": 35394, "step": 123309}
{"lr": 0.0008505015937517103, "data_time": 0.4458014488220215, "grad_norm": 0.43133224844932555, "loss": 0.457870090007782, "time": 1.6764707803726195, "epoch": 395, "memory": 35394, "step": 123422}
{"lr": 0.0008494741657225635, "data_time": 0.00252680778503418, "grad_norm": 0.3968382716178894, "loss": 0.4676579892635345, "time": 1.5401338338851929, "epoch": 395, "memory": 35394, "step": 123522}
{"lr": 0.0008484466557714668, "data_time": 0.002787208557128906, "grad_norm": 0.5015817791223526, "loss": 0.4677349328994751, "time": 1.5645183086395265, "epoch": 395, "memory": 35394, "step": 123622}
{"lr": 0.0008472854730740446, "data_time": 0.44418723583221437, "grad_norm": 0.4396770834922791, "loss": 0.464572674036026, "time": 1.5055556058883668, "epoch": 396, "memory": 35394, "step": 123735}
{"lr": 0.0008462577943009102, "data_time": 0.19867920875549316, "grad_norm": 0.45125501453876493, "loss": 0.46256392598152163, "time": 1.527178692817688, "epoch": 396, "memory": 35394, "step": 123835}
{"lr": 0.0008452300389316638, "data_time": 0.002723884582519531, "grad_norm": 0.4330184906721115, "loss": 0.4590120166540146, "time": 1.523288631439209, "epoch": 396, "memory": 35394, "step": 123935}
{"lr": 0.0008440685853216227, "data_time": 0.559187388420105, "grad_norm": 0.442657607793808, "loss": 0.4642941981554031, "time": 1.4773359775543213, "epoch": 397, "memory": 35394, "step": 124048}
{"lr": 0.0008430406724757178, "data_time": 0.46306397914886477, "grad_norm": 0.48403152227401736, "loss": 0.471614745259285, "time": 1.4996608018875122, "epoch": 397, "memory": 35394, "step": 124148}
{"lr": 0.0008420126883607823, "data_time": 0.21747918128967286, "grad_norm": 0.4971638023853302, "loss": 0.46078563928604127, "time": 1.5178658723831178, "epoch": 397, "memory": 35394, "step": 124248}
{"lr": 0.0008408509826794643, "data_time": 0.2970097064971924, "grad_norm": 0.486805921792984, "loss": 0.46128281354904177, "time": 1.4457680940628053, "epoch": 398, "memory": 35394, "step": 124361}
{"lr": 0.0008398228524358056, "data_time": 0.048958420753479004, "grad_norm": 0.4107523947954178, "loss": 0.4602960407733917, "time": 1.5106592655181885, "epoch": 398, "memory": 35394, "step": 124461}
{"lr": 0.000838794656251352, "data_time": 0.0030339956283569336, "grad_norm": 0.5130128860473633, "loss": 0.46103507876396177, "time": 1.5265021085739137, "epoch": 398, "memory": 35394, "step": 124561}
{"lr": 0.0008376327173441871, "data_time": 0.2880099296569824, "grad_norm": 0.5112365305423736, "loss": 0.4594176709651947, "time": 1.5402940273284913, "epoch": 399, "memory": 35394, "step": 124674}
{"lr": 0.0008366043863813151, "data_time": 0.23318519592285156, "grad_norm": 0.49812488853931425, "loss": 0.4572181969881058, "time": 1.4949955701828004, "epoch": 399, "memory": 35394, "step": 124774}
{"lr": 0.0008355759948069567, "data_time": 0.03589231967926025, "grad_norm": 0.5209626525640487, "loss": 0.4624732404947281, "time": 1.527500820159912, "epoch": 399, "memory": 35394, "step": 124874}
{"lr": 0.0008344138415231589, "data_time": 0.4253562927246094, "grad_norm": 0.4561189204454422, "loss": 0.46318788528442384, "time": 1.5656606197357177, "epoch": 400, "memory": 35394, "step": 124987}
{"lr": 0.0008333853265228732, "data_time": 0.003357601165771484, "grad_norm": 0.3968700349330902, "loss": 0.46013380885124205, "time": 1.424237608909607, "epoch": 400, "memory": 35394, "step": 125087}
{"lr": 0.0008323567562413909, "data_time": 0.003936982154846192, "grad_norm": 0.4331464558839798, "loss": 0.4649818688631058, "time": 1.5884052991867066, "epoch": 400, "memory": 35394, "step": 125187}
{"lr": 0.0008311944074336524, "data_time": 0.16798853874206543, "grad_norm": 0.4240272670984268, "loss": 0.4564566284418106, "time": 1.5748732328414916, "epoch": 401, "memory": 35394, "step": 125300}
{"lr": 0.0008301657250807361, "data_time": 0.0024417877197265626, "grad_norm": 0.4517529636621475, "loss": 0.4645377486944199, "time": 1.962897539138794, "epoch": 401, "memory": 35394, "step": 125400}
{"lr": 0.0008291369927778114, "data_time": 0.0032598495483398436, "grad_norm": 0.44979611933231356, "loss": 0.462200465798378, "time": 1.5073214292526245, "epoch": 401, "memory": 35394, "step": 125500}
{"lr": 0.0008279744673019954, "data_time": 0.31903848648071287, "grad_norm": 0.4420652240514755, "loss": 0.46366200149059295, "time": 1.5104835510253907, "epoch": 402, "memory": 35394, "step": 125613}
{"lr": 0.000826945634283948, "data_time": 0.05228996276855469, "grad_norm": 0.5099644720554352, "loss": 0.4600056618452072, "time": 1.548583698272705, "epoch": 402, "memory": 35394, "step": 125713}
{"lr": 0.0008259167566478878, "data_time": 0.0035169363021850587, "grad_norm": 0.4784383147954941, "loss": 0.46603888273239136, "time": 1.4764370441436767, "epoch": 402, "memory": 35394, "step": 125813}
{"lr": 0.000824754073362724, "data_time": 0.5589809417724609, "grad_norm": 0.4873841255903244, "loss": 0.45028905272483827, "time": 1.5390839338302613, "epoch": 403, "memory": 35394, "step": 125926}
{"lr": 0.000823725106369487, "data_time": 0.27354633808135986, "grad_norm": 0.534558093547821, "loss": 0.45964729487895967, "time": 1.5419069290161134, "epoch": 403, "memory": 35394, "step": 126026}
{"lr": 0.0008226961000909582, "data_time": 0.0028319358825683594, "grad_norm": 0.4804656058549881, "loss": 0.4593138962984085, "time": 1.5410098075866698, "epoch": 403, "memory": 35394, "step": 126126}
{"lr": 0.0008215332778577368, "data_time": 0.30580706596374513, "grad_norm": 0.443744495511055, "loss": 0.46774115562438967, "time": 1.4995749473571778, "epoch": 404, "memory": 35394, "step": 126239}
{"lr": 0.0008205041935814273, "data_time": 0.014412283897399902, "grad_norm": 0.4833071932196617, "loss": 0.46040997803211214, "time": 1.588347029685974, "epoch": 404, "memory": 35394, "step": 126339}
{"lr": 0.0008194750753531851, "data_time": 0.004413390159606933, "grad_norm": 0.48660424947738645, "loss": 0.4668626576662064, "time": 1.5616748809814454, "epoch": 404, "memory": 35394, "step": 126439}
{"lr": 0.0008183121330354482, "data_time": 0.5502748489379883, "grad_norm": 0.4789677053689957, "loss": 0.46133041977882383, "time": 1.4836636066436768, "epoch": 405, "memory": 35394, "step": 126552}
{"lr": 0.0008172829481700842, "data_time": 0.3962429046630859, "grad_norm": 0.489095538854599, "loss": 0.46119188368320463, "time": 1.4906890153884889, "epoch": 405, "memory": 35394, "step": 126652}
{"lr": 0.0008162537346866963, "data_time": 0.003104710578918457, "grad_norm": 0.5365805894136428, "loss": 0.46608927845954895, "time": 1.5100425004959106, "epoch": 405, "memory": 35394, "step": 126752}
{"lr": 0.0008150906911499361, "data_time": 0.5027698278427124, "grad_norm": 0.43711506128311156, "loss": 0.46298975944519044, "time": 1.5826619863510132, "epoch": 406, "memory": 35394, "step": 126865}
{"lr": 0.000814061422391168, "data_time": 0.1912355899810791, "grad_norm": 0.5190009504556656, "loss": 0.46228228211402894, "time": 1.5077178955078125, "epoch": 406, "memory": 35394, "step": 126965}
{"lr": 0.0008130321303487507, "data_time": 0.0027934551239013673, "grad_norm": 0.4787935674190521, "loss": 0.4657295078039169, "time": 1.5529598236083983, "epoch": 406, "memory": 35394, "step": 127065}
{"lr": 0.0008118690044601016, "data_time": 0.46150104999542235, "grad_norm": 0.49225841760635375, "loss": 0.46557470262050626, "time": 1.549418306350708, "epoch": 407, "memory": 35394, "step": 127178}
{"lr": 0.0008108396685049414, "data_time": 0.141729998588562, "grad_norm": 0.4953133940696716, "loss": 0.4597805321216583, "time": 1.5857229471206664, "epoch": 407, "memory": 35394, "step": 127278}
{"lr": 0.0008098103146008827, "data_time": 0.0025978565216064455, "grad_norm": 0.46129357218742373, "loss": 0.4575733423233032, "time": 1.5131855964660645, "epoch": 407, "memory": 35394, "step": 127378}
{"lr": 0.0008086471252288134, "data_time": 0.2614386558532715, "grad_norm": 0.4500200033187866, "loss": 0.4621761113405228, "time": 1.4969885110855103, "epoch": 408, "memory": 35394, "step": 127491}
{"lr": 0.0008076177387753629, "data_time": 0.0020095109939575195, "grad_norm": 0.4845981180667877, "loss": 0.4624579131603241, "time": 1.5652259588241577, "epoch": 408, "memory": 35394, "step": 127591}
{"lr": 0.000806588339708056, "data_time": 0.0029127836227416993, "grad_norm": 0.5073816120624542, "loss": 0.4619143933057785, "time": 1.547095775604248, "epoch": 408, "memory": 35394, "step": 127691}
{"lr": 0.0008054251057220657, "data_time": 0.06957221031188965, "grad_norm": 0.472503525018692, "loss": 0.46430596709251404, "time": 1.2152522802352905, "epoch": 409, "memory": 35394, "step": 127804}
{"lr": 0.0008043956854692452, "data_time": 0.5442984104156494, "grad_norm": 0.5042347252368927, "loss": 0.4647949248552322, "time": 1.4845362663269044, "epoch": 409, "memory": 35394, "step": 127904}
{"lr": 0.0008033662579378153, "data_time": 0.3471859931945801, "grad_norm": 0.4684497654438019, "loss": 0.4614375650882721, "time": 1.5192158222198486, "epoch": 409, "memory": 35394, "step": 128004}
{"lr": 0.0008022029982081284, "data_time": 0.3926306009292603, "grad_norm": 0.47348963022232055, "loss": 0.47181536853313444, "time": 1.4832236289978027, "epoch": 410, "memory": 35394, "step": 128117}
{"lr": 0.000801173560855406, "data_time": 0.04929130077362061, "grad_norm": 0.45819922983646394, "loss": 0.4620432794094086, "time": 1.553200602531433, "epoch": 410, "memory": 35394, "step": 128217}
{"lr": 0.0008001441215594403, "data_time": 0.0030054330825805666, "grad_norm": 0.4923463732004166, "loss": 0.46810154616832733, "time": 1.499621534347534, "epoch": 410, "memory": 35394, "step": 128317}
{"lr": 0.0007989808549566964, "data_time": 0.535732102394104, "grad_norm": 0.4487036824226379, "loss": 0.46066679060459137, "time": 1.5063320398330688, "epoch": 411, "memory": 35394, "step": 128430}
{"lr": 0.0007979514172038192, "data_time": 0.002293252944946289, "grad_norm": 0.4250913292169571, "loss": 0.45933845043182375, "time": 1.2245362758636475, "epoch": 411, "memory": 35394, "step": 128530}
{"lr": 0.0007969219828430943, "data_time": 0.002560710906982422, "grad_norm": 0.4431359052658081, "loss": 0.46473979353904726, "time": 1.516541028022766, "epoch": 411, "memory": 35394, "step": 128630}
{"lr": 0.0007957587282380447, "data_time": 0.548944616317749, "grad_norm": 0.46401681303977965, "loss": 0.4605091392993927, "time": 1.5315635919570922, "epoch": 412, "memory": 35394, "step": 128743}
{"lr": 0.0007947293067847654, "data_time": 0.002972722053527832, "grad_norm": 0.46056637167930603, "loss": 0.4623925775289536, "time": 1.428454828262329, "epoch": 412, "memory": 35394, "step": 128843}
{"lr": 0.0007936998940589801, "data_time": 0.0031389474868774416, "grad_norm": 0.47572797238826753, "loss": 0.46513490080833436, "time": 1.5710363388061523, "epoch": 412, "memory": 35394, "step": 128943}
{"lr": 0.0007925366703221842, "data_time": 0.4964930057525635, "grad_norm": 0.49974803626537323, "loss": 0.45977613925933836, "time": 1.60146164894104, "epoch": 413, "memory": 35394, "step": 129056}
{"lr": 0.0007915072818679918, "data_time": 0.0437178373336792, "grad_norm": 0.4093714714050293, "loss": 0.4652826011180878, "time": 1.650307035446167, "epoch": 413, "memory": 35394, "step": 129156}
{"lr": 0.0007904779074764926, "data_time": 0.003552865982055664, "grad_norm": 0.43560094833374025, "loss": 0.46284129917621614, "time": 1.5217195510864259, "epoch": 413, "memory": 35394, "step": 129256}
{"lr": 0.0007893147334780068, "data_time": 0.4428139925003052, "grad_norm": 0.5018686592578888, "loss": 0.4590142875909805, "time": 1.5036786556243897, "epoch": 414, "memory": 35394, "step": 129369}
{"lr": 0.0007882853947218538, "data_time": 0.1879323959350586, "grad_norm": Infinity, "loss": 0.4604321449995041, "time": 1.5828823804855348, "epoch": 414, "memory": 35394, "step": 129469}
{"lr": 0.0007872560753633669, "data_time": 0.002276015281677246, "grad_norm": 0.4712588369846344, "loss": 0.4545822888612747, "time": 1.553980779647827, "epoch": 414, "memory": 35394, "step": 129569}
{"lr": 0.0007860929699724413, "data_time": 0.3337255477905273, "grad_norm": 0.5080683320760727, "loss": 0.46481105089187624, "time": 1.5131177186965943, "epoch": 415, "memory": 35394, "step": 129682}
{"lr": 0.0007850636976124751, "data_time": 0.11972310543060302, "grad_norm": 0.44564695060253146, "loss": 0.46093666553497314, "time": 1.5698202133178711, "epoch": 415, "memory": 35394, "step": 129782}
{"lr": 0.0007840344499848315, "data_time": 0.0036071300506591796, "grad_norm": 0.44139590859413147, "loss": 0.459196063876152, "time": 1.336792778968811, "epoch": 415, "memory": 35394, "step": 129882}
{"lr": 0.0007828714320696028, "data_time": 0.5516732931137085, "grad_norm": 0.48473972678184507, "loss": 0.45674683451652526, "time": 1.4869813442230224, "epoch": 416, "memory": 35394, "step": 129995}
{"lr": 0.0007818422428028927, "data_time": 0.2292172908782959, "grad_norm": 0.5001439362764358, "loss": 0.46912693977355957, "time": 1.5247663259506226, "epoch": 416, "memory": 35394, "step": 130095}
{"lr": 0.0007808130836027614, "data_time": 0.002911496162414551, "grad_norm": 0.5104158312082291, "loss": 0.4596225291490555, "time": 1.5464042663574218, "epoch": 416, "memory": 35394, "step": 130195}
{"lr": 0.000779650172029947, "data_time": 0.4855785608291626, "grad_norm": 0.4239960998296738, "loss": 0.46430186927318573, "time": 1.527513289451599, "epoch": 417, "memory": 35394, "step": 130308}
{"lr": 0.0007786210825522159, "data_time": 0.19787447452545165, "grad_norm": 0.5092773169279099, "loss": 0.4610517591238022, "time": 1.572093939781189, "epoch": 417, "memory": 35394, "step": 130408}
{"lr": 0.0007775920284748307, "data_time": 0.003065752983093262, "grad_norm": 0.4869637370109558, "loss": 0.46202807426452636, "time": 1.4640485525131226, "epoch": 417, "memory": 35394, "step": 130508}
{"lr": 0.0007764292421094249, "data_time": 0.644010353088379, "grad_norm": 0.48015413880348207, "loss": 0.45699204206466676, "time": 1.6199530601501464, "epoch": 418, "memory": 35394, "step": 130621}
{"lr": 0.0007754002691147741, "data_time": 0.37414324283599854, "grad_norm": 0.5139708310365677, "loss": 0.4631273716688156, "time": 1.571000075340271, "epoch": 418, "memory": 35394, "step": 130721}
{"lr": 0.000774371336853664, "data_time": 0.09165587425231933, "grad_norm": 0.49450262188911437, "loss": 0.46429385244846344, "time": 1.5488343238830566, "epoch": 418, "memory": 35394, "step": 130821}
{"lr": 0.0007732086945586276, "data_time": 0.46601414680480957, "grad_norm": 0.4845228046178818, "loss": 0.45622915029525757, "time": 1.574863362312317, "epoch": 419, "memory": 35394, "step": 130934}
{"lr": 0.0007721798547392717, "data_time": 0.31970667839050293, "grad_norm": 0.4161977171897888, "loss": 0.46290566623210905, "time": 1.5593523263931275, "epoch": 419, "memory": 35394, "step": 131034}
{"lr": 0.0007711510609859889, "data_time": 0.03889741897583008, "grad_norm": 0.438026687502861, "loss": 0.4631377786397934, "time": 1.541309118270874, "epoch": 419, "memory": 35394, "step": 131134}
{"lr": 0.0007699885816219449, "data_time": 0.24503612518310547, "grad_norm": 0.4520964503288269, "loss": 0.45535717606544496, "time": 1.380501914024353, "epoch": 420, "memory": 35394, "step": 131247}
{"lr": 0.000768959891667938, "data_time": 0.04092674255371094, "grad_norm": 0.44753319025039673, "loss": 0.46415372788906095, "time": 1.6172717809677124, "epoch": 420, "memory": 35394, "step": 131347}
{"lr": 0.0007679312531117892, "data_time": 0.0031385898590087892, "grad_norm": 0.48241367042064665, "loss": 0.4660127341747284, "time": 1.5278614997863769, "epoch": 420, "memory": 35394, "step": 131447}
{"lr": 0.0007667689555367228, "data_time": 0.5865872621536254, "grad_norm": 0.42883131504058836, "loss": 0.4625865012407303, "time": 1.5198851585388184, "epoch": 421, "memory": 35394, "step": 131560}
{"lr": 0.0007657404321356843, "data_time": 0.267848801612854, "grad_norm": 0.4649441421031952, "loss": 0.4627122819423676, "time": 1.5592204809188843, "epoch": 421, "memory": 35394, "step": 131660}
{"lr": 0.0007647119654634574, "data_time": 0.0022372961044311523, "grad_norm": 0.5159564107656479, "loss": 0.4637784421443939, "time": 1.5601675987243653, "epoch": 421, "memory": 35394, "step": 131760}
{"lr": 0.0007635498685323988, "data_time": 0.6589699506759643, "grad_norm": 0.4649036556482315, "loss": 0.4619953989982605, "time": 1.5878512382507324, "epoch": 422, "memory": 35394, "step": 131873}
{"lr": 0.0007625215283692499, "data_time": 0.3618939876556396, "grad_norm": 0.42270139753818514, "loss": 0.4593507796525955, "time": 1.5332109212875367, "epoch": 422, "memory": 35394, "step": 131973}
{"lr": 0.0007614932502649447, "data_time": 0.09929156303405762, "grad_norm": 0.5235551923513413, "loss": 0.46581989228725434, "time": 1.5474856853485108, "epoch": 422, "memory": 35394, "step": 132073}
{"lr": 0.0007603313728296713, "data_time": 0.5772545099258423, "grad_norm": 0.49471780061721804, "loss": 0.45993287265300753, "time": 1.56036274433136, "epoch": 423, "memory": 35394, "step": 132186}
{"lr": 0.0007593032325863599, "data_time": 0.27472658157348634, "grad_norm": 0.5170948922634124, "loss": 0.46400604844093324, "time": 1.5725914239883423, "epoch": 423, "memory": 35394, "step": 132286}
{"lr": 0.0007582751597309171, "data_time": 0.002477884292602539, "grad_norm": 0.44550026059150694, "loss": 0.45195417702198026, "time": 1.6071324825286866, "epoch": 423, "memory": 35394, "step": 132386}
{"lr": 0.000757113520639647, "data_time": 0.015038228034973145, "grad_norm": 0.5534243792295456, "loss": 0.4556361049413681, "time": 1.6417625665664672, "epoch": 424, "memory": 35394, "step": 132499}
{"lr": 0.0007560855969948773, "data_time": 0.002623248100280762, "grad_norm": 0.4918697327375412, "loss": 0.4609454423189163, "time": 1.382585382461548, "epoch": 424, "memory": 35394, "step": 132599}
{"lr": 0.0007550577460659076, "data_time": 0.0027357101440429687, "grad_norm": 0.49318069219589233, "loss": 0.4616269528865814, "time": 1.6130537033081054, "epoch": 424, "memory": 35394, "step": 132699}
{"lr": 0.0007538963641629906, "data_time": 0.590048885345459, "grad_norm": 0.4525815099477768, "loss": 0.4594533383846283, "time": 1.6848021745681763, "epoch": 425, "memory": 35394, "step": 132812}
{"lr": 0.0007528686737919537, "data_time": 0.07929732799530029, "grad_norm": 0.4322767645120621, "loss": 0.46446330547332765, "time": 1.5878856420516967, "epoch": 425, "memory": 35394, "step": 132912}
{"lr": 0.0007518410614634692, "data_time": 0.0024321794509887694, "grad_norm": 0.5067391693592072, "loss": 0.4629304051399231, "time": 1.6257409811019898, "epoch": 425, "memory": 35394, "step": 133012}
{"lr": 0.0007506799555890836, "data_time": 0.4988080978393555, "grad_norm": 0.465751713514328, "loss": 0.4638523429632187, "time": 1.5196877002716065, "epoch": 426, "memory": 35394, "step": 133125}
{"lr": 0.0007496525151631844, "data_time": 0.20890722274780274, "grad_norm": 0.5026269018650055, "loss": 0.4612779080867767, "time": 1.507576060295105, "epoch": 426, "memory": 35394, "step": 133225}
{"lr": 0.0007486251581053248, "data_time": 0.0026610851287841796, "grad_norm": 0.5124444544315339, "loss": 0.45965120792388914, "time": 1.5258639335632325, "epoch": 426, "memory": 35394, "step": 133325}
{"lr": 0.0007474643470951704, "data_time": 0.016214990615844728, "grad_norm": 0.5256038904190063, "loss": 0.4603685051202774, "time": 1.4831087350845338, "epoch": 427, "memory": 35394, "step": 133438}
{"lr": 0.0007464371732817629, "data_time": 0.002395915985107422, "grad_norm": 0.4776729941368103, "loss": 0.46193221509456633, "time": 1.5750010013580322, "epoch": 427, "memory": 35394, "step": 133538}
{"lr": 0.0007454100881605282, "data_time": 0.0029905557632446287, "grad_norm": 0.49713248908519747, "loss": 0.45799975097179413, "time": 1.5294370889663695, "epoch": 427, "memory": 35394, "step": 133638}
{"lr": 0.0007442495908455227, "data_time": 0.21846406459808348, "grad_norm": 0.48414393067359923, "loss": 0.46079557836055757, "time": 1.4661475658416747, "epoch": 428, "memory": 35394, "step": 133751}
{"lr": 0.000743222700307632, "data_time": 0.1162604808807373, "grad_norm": 0.49118084609508517, "loss": 0.4601077973842621, "time": 1.5366430282592773, "epoch": 428, "memory": 35394, "step": 133851}
{"lr": 0.0007421959037846092, "data_time": 0.003058910369873047, "grad_norm": 0.4448954939842224, "loss": 0.457641014456749, "time": 1.527402901649475, "epoch": 428, "memory": 35394, "step": 133951}
{"lr": 0.0007410357389905799, "data_time": 0.48117704391479493, "grad_norm": 0.4472320079803467, "loss": 0.4571744501590729, "time": 1.5707884550094604, "epoch": 429, "memory": 35394, "step": 134064}
{"lr": 0.0007400091483866383, "data_time": 0.20175619125366212, "grad_norm": 0.5194329112768173, "loss": 0.45459108948707583, "time": 1.533433198928833, "epoch": 429, "memory": 35394, "step": 134164}
{"lr": 0.0007389826571187344, "data_time": 0.0027493000030517577, "grad_norm": 0.42080663442611693, "loss": 0.46646563410758973, "time": 1.5409100770950317, "epoch": 429, "memory": 35394, "step": 134264}
{"lr": 0.000737822843666117, "data_time": 0.4755248546600342, "grad_norm": 0.4923559814691544, "loss": 0.46265061795711515, "time": 1.546687126159668, "epoch": 430, "memory": 35394, "step": 134377}
{"lr": 0.0007367965696496907, "data_time": 0.24656922817230226, "grad_norm": 0.5003244310617447, "loss": 0.46288955509662627, "time": 1.5205992698669433, "epoch": 430, "memory": 35394, "step": 134477}
{"lr": 0.0007357704002888601, "data_time": 0.002930879592895508, "grad_norm": 0.4402347207069397, "loss": 0.4576769798994064, "time": 1.4837854623794555, "epoch": 430, "memory": 35394, "step": 134577}
{"lr": 0.0007346109569923877, "data_time": 0.5439001321792603, "grad_norm": 0.48950086534023285, "loss": 0.46496088802814484, "time": 1.5497824192047118, "epoch": 431, "memory": 35394, "step": 134690}
{"lr": 0.0007335850162119054, "data_time": 0.27193412780761717, "grad_norm": 0.48554982542991637, "loss": 0.4587989777326584, "time": 1.569877529144287, "epoch": 431, "memory": 35394, "step": 134790}
{"lr": 0.00073255918540488, "data_time": 0.03468668460845947, "grad_norm": 0.4874837815761566, "loss": 0.4604088574647903, "time": 1.5794793605804442, "epoch": 431, "memory": 35394, "step": 134890}
{"lr": 0.000731400131073283, "data_time": 0.5541062593460083, "grad_norm": 0.49675900042057036, "loss": 0.46056089401245115, "time": 1.5412570238113403, "epoch": 432, "memory": 35394, "step": 135003}
{"lr": 0.0007303745401717703, "data_time": 0.3957386493682861, "grad_norm": 0.4345190614461899, "loss": 0.4590401291847229, "time": 1.5102569818496705, "epoch": 432, "memory": 35394, "step": 135103}
{"lr": 0.0007293490645597912, "data_time": 0.04731769561767578, "grad_norm": 0.4910277396440506, "loss": 0.4653948873281479, "time": 1.7797212839126586, "epoch": 432, "memory": 35394, "step": 135203}
{"lr": 0.0007281904179954899, "data_time": 0.37452795505523684, "grad_norm": 0.45429456532001494, "loss": 0.4567677974700928, "time": 1.486371397972107, "epoch": 433, "memory": 35394, "step": 135316}
{"lr": 0.000727165193610294, "data_time": 0.015356183052062988, "grad_norm": 0.48789272010326384, "loss": 0.45977153182029723, "time": 1.4717490673065186, "epoch": 433, "memory": 35394, "step": 135416}
{"lr": 0.0007261400898288389, "data_time": 0.1029120683670044, "grad_norm": 0.479408985376358, "loss": 0.4664169579744339, "time": 1.696608829498291, "epoch": 433, "memory": 35394, "step": 135516}
{"lr": 0.0007249818698276376, "data_time": 0.5687220811843872, "grad_norm": 0.4548847824335098, "loss": 0.4615360975265503, "time": 1.518554162979126, "epoch": 434, "memory": 35394, "step": 135629}
{"lr": 0.000723957028590163, "data_time": 0.21081502437591554, "grad_norm": 0.5040798932313919, "loss": 0.4568161189556122, "time": 1.5283570766448975, "epoch": 434, "memory": 35394, "step": 135729}
{"lr": 0.0007229323132686778, "data_time": 0.0028081655502319334, "grad_norm": 0.48523223400115967, "loss": 0.4578641206026077, "time": 1.5504504203796388, "epoch": 434, "memory": 35394, "step": 135829}
{"lr": 0.0007217745386194621, "data_time": 0.30751302242279055, "grad_norm": 0.48581899106502535, "loss": 0.4621519833803177, "time": 1.4180559873580934, "epoch": 435, "memory": 35394, "step": 135942}
{"lr": 0.000720750097154894, "data_time": 0.16794238090515137, "grad_norm": 0.510479924082756, "loss": 0.4624408662319183, "time": 1.5129233360290528, "epoch": 435, "memory": 35394, "step": 136042}
{"lr": 0.0007197257869165232, "data_time": 0.0021323204040527345, "grad_norm": 0.49409317374229433, "loss": 0.46332195699214934, "time": 1.6087597370147706, "epoch": 435, "memory": 35394, "step": 136142}
{"lr": 0.0007185684764009542, "data_time": 0.5547876834869385, "grad_norm": 0.5408308058977127, "loss": 0.4652552008628845, "time": 1.504785704612732, "epoch": 436, "memory": 35394, "step": 136255}
{"lr": 0.0007175444513279959, "data_time": 0.3725828886032104, "grad_norm": 0.5089448601007461, "loss": 0.4509001851081848, "time": 1.5308786630630493, "epoch": 436, "memory": 35394, "step": 136355}
{"lr": 0.0007165205627893128, "data_time": 0.02811720371246338, "grad_norm": 0.4538776636123657, "loss": 0.45660494565963744, "time": 1.5390478134155274, "epoch": 436, "memory": 35394, "step": 136455}
{"lr": 0.0007153637351815202, "data_time": 0.5698870420455933, "grad_norm": 0.4930434197187424, "loss": 0.46539574563503266, "time": 1.5221275329589843, "epoch": 437, "memory": 35394, "step": 136568}
{"lr": 0.0007143401431121179, "data_time": 0.23919663429260254, "grad_norm": 0.42585051357746123, "loss": 0.4628249555826187, "time": 1.5122553348541259, "epoch": 437, "memory": 35394, "step": 136668}
{"lr": 0.0007133166928828556, "data_time": 0.002228999137878418, "grad_norm": 0.5076110392808915, "loss": 0.46013620793819426, "time": 1.48830406665802, "epoch": 437, "memory": 35394, "step": 136768}
{"lr": 0.0007121603669491391, "data_time": 0.446143651008606, "grad_norm": 0.5267078578472137, "loss": 0.4592681646347046, "time": 1.566131043434143, "epoch": 438, "memory": 35394, "step": 136881}
{"lr": 0.000711137224488218, "data_time": 0.2765967845916748, "grad_norm": 0.5165354371070862, "loss": 0.46104643046855925, "time": 1.578632402420044, "epoch": 438, "memory": 35394, "step": 136981}
{"lr": 0.000710114229170998, "data_time": 0.07118067741394044, "grad_norm": 0.4689114332199097, "loss": 0.4640138536691666, "time": 1.524516797065735, "epoch": 438, "memory": 35394, "step": 137081}
{"lr": 0.0007089584236695155, "data_time": 0.47111237049102783, "grad_norm": 0.5285608798265458, "loss": 0.4606296747922897, "time": 1.5354026794433593, "epoch": 439, "memory": 35394, "step": 137194}
{"lr": 0.0007079357474147024, "data_time": 0.10822665691375732, "grad_norm": 0.4700768679380417, "loss": 0.4612482666969299, "time": 1.5548714637756347, "epoch": 439, "memory": 35394, "step": 137294}
{"lr": 0.000706913223604767, "data_time": 0.003165531158447266, "grad_norm": 0.44926022589206693, "loss": 0.4590598553419113, "time": 1.5504784822463988, "epoch": 439, "memory": 35394, "step": 137394}
{"lr": 0.000705757957285235, "data_time": 0.5249054193496704, "grad_norm": 0.4799146384000778, "loss": 0.4664245307445526, "time": 1.5615450382232665, "epoch": 440, "memory": 35394, "step": 137507}
{"lr": 0.0007047357638265974, "data_time": 0.23668148517608642, "grad_norm": 0.48511249423027036, "loss": 0.45753124058246614, "time": 1.5571000576019287, "epoch": 440, "memory": 35394, "step": 137607}
{"lr": 0.0007037137281115406, "data_time": 0.07716403007507325, "grad_norm": 0.5155068576335907, "loss": 0.46699685156345366, "time": 1.6099170446395874, "epoch": 440, "memory": 35394, "step": 137707}
{"lr": 0.00070255901971493, "data_time": 0.5776152610778809, "grad_norm": 0.45021572709083557, "loss": 0.46249938011169434, "time": 1.524083948135376, "epoch": 441, "memory": 35394, "step": 137820}
{"lr": 0.0007015373256347016, "data_time": 0.2776158094406128, "grad_norm": 0.46499556601047515, "loss": 0.46655089855194093, "time": 1.561160111427307, "epoch": 441, "memory": 35394, "step": 137920}
{"lr": 0.0007005157945941984, "data_time": 0.002899765968322754, "grad_norm": 0.4717925012111664, "loss": 0.468775275349617, "time": 1.3300047874450684, "epoch": 441, "memory": 35394, "step": 138020}
{"lr": 0.0006993616628524283, "data_time": 0.4530501365661621, "grad_norm": 0.5204205274581909, "loss": 0.47357961535453796, "time": 1.5045105457305907, "epoch": 442, "memory": 35394, "step": 138133}
{"lr": 0.0006983404847247443, "data_time": 1.7855005025863648, "grad_norm": 0.5262113749980927, "loss": 0.4550956904888153, "time": 3.637418293952942, "epoch": 442, "memory": 35394, "step": 138233}
{"lr": 0.0006973194749302834, "data_time": 1.6485404968261719, "grad_norm": 0.544659873843193, "loss": 0.4585131585597992, "time": 4.4813954591751095, "epoch": 442, "memory": 35394, "step": 138333}
{"lr": 0.0006961659385659182, "data_time": 1.8574756860733033, "grad_norm": 0.5009800374507904, "loss": 0.4684096336364746, "time": 2.9571255922317503, "epoch": 443, "memory": 35394, "step": 138446}
{"lr": 0.00069514529295654, "data_time": 1.696171760559082, "grad_norm": 0.5520819127559662, "loss": 0.46638205647468567, "time": 3.2953576326370237, "epoch": 443, "memory": 35394, "step": 138546}
{"lr": 0.0006941248209711529, "data_time": 0.9135033369064331, "grad_norm": 0.4756655961275101, "loss": 0.4629565834999084, "time": 2.7950507164001466, "epoch": 443, "memory": 35394, "step": 138646}
{"lr": 0.0006929718986970981, "data_time": 1.3150236845016479, "grad_norm": 0.5408902078866958, "loss": 0.46794935762882234, "time": 2.6481194496154785, "epoch": 444, "memory": 35394, "step": 138759}
{"lr": 0.0006919518021631523, "data_time": 0.7795651912689209, "grad_norm": 0.49472035765647887, "loss": 0.4687398046255112, "time": 2.760712170600891, "epoch": 444, "memory": 35394, "step": 138859}
{"lr": 0.000690931884541149, "data_time": 0.38036222457885743, "grad_norm": 0.5367949336767197, "loss": 0.45965767502784727, "time": 2.6549333572387694, "epoch": 444, "memory": 35394, "step": 138959}
{"lr": 0.00068977959506035, "data_time": 0.9335639715194702, "grad_norm": 0.43983646035194396, "loss": 0.46405328512191774, "time": 1.9929667949676513, "epoch": 445, "memory": 35394, "step": 139072}
{"lr": 0.0006887600641500528, "data_time": 0.3692065477371216, "grad_norm": 0.5099006950855255, "loss": 0.46115460693836213, "time": 1.593319582939148, "epoch": 445, "memory": 35394, "step": 139172}
{"lr": 0.0006877407174367488, "data_time": 0.1051638126373291, "grad_norm": 0.4914274215698242, "loss": 0.4600334346294403, "time": 1.580415630340576, "epoch": 445, "memory": 35394, "step": 139272}
{"lr": 0.0006865890794418842, "data_time": 0.5491058111190796, "grad_norm": 0.47024130523204805, "loss": 0.4676549106836319, "time": 1.5252463579177857, "epoch": 446, "memory": 35394, "step": 139385}
{"lr": 0.0006855701306942794, "data_time": 0.3410999298095703, "grad_norm": 0.5584923088550567, "loss": 0.45799137055873873, "time": 1.5346842050552367, "epoch": 446, "memory": 35394, "step": 139485}
{"lr": 0.0006845513714257275, "data_time": 0.02210075855255127, "grad_norm": 0.5030001878738404, "loss": 0.4578752934932709, "time": 1.6604676961898803, "epoch": 446, "memory": 35394, "step": 139585}
{"lr": 0.0006834004035989081, "data_time": 0.5798029661178589, "grad_norm": 0.5350233882665634, "loss": 0.4671083092689514, "time": 1.548690629005432, "epoch": 447, "memory": 35394, "step": 139698}
{"lr": 0.0006823820535435925, "data_time": 0.4362912893295288, "grad_norm": 0.512104070186615, "loss": 0.46072037518024445, "time": 1.5616682767868042, "epoch": 447, "memory": 35394, "step": 139798}
{"lr": 0.0006813638982463174, "data_time": 0.06338682174682617, "grad_norm": 0.49605554044246675, "loss": 0.4579521298408508, "time": 1.579754853248596, "epoch": 447, "memory": 35394, "step": 139898}
{"lr": 0.0006802136192587822, "data_time": 0.3943720579147339, "grad_norm": 0.5372336477041244, "loss": 0.45988790690898895, "time": 1.4568660974502563, "epoch": 448, "memory": 35394, "step": 140011}
{"lr": 0.0006791958844156413, "data_time": 0.019506168365478516, "grad_norm": 0.49817411601543427, "loss": 0.4626215219497681, "time": 1.518184733390808, "epoch": 448, "memory": 35394, "step": 140111}
{"lr": 0.0006781783496063693, "data_time": 0.002790498733520508, "grad_norm": 0.5248063385486603, "loss": 0.4535695970058441, "time": 1.6034734725952149, "epoch": 448, "memory": 35394, "step": 140211}
{"lr": 0.0006770287781181835, "data_time": 0.5188650369644165, "grad_norm": 0.5238263040781022, "loss": 0.45553546249866483, "time": 1.5289092779159545, "epoch": 449, "memory": 35394, "step": 140324}
{"lr": 0.0006760116749971234, "data_time": 0.27566649913787844, "grad_norm": 0.4659523397684097, "loss": 0.4564900457859039, "time": 1.6206937789916993, "epoch": 449, "memory": 35394, "step": 140424}
{"lr": 0.0006749947771825162, "data_time": 0.0022972583770751952, "grad_norm": 0.5144504278898239, "loss": 0.4651366651058197, "time": 1.529384183883667, "epoch": 449, "memory": 35394, "step": 140524}
{"lr": 0.0006738459318422655, "data_time": 0.4573718309402466, "grad_norm": 0.5892918080091476, "loss": 0.46062980890274047, "time": 1.5704143047332764, "epoch": 450, "memory": 35394, "step": 140637}
{"lr": 0.0006728294769429445, "data_time": 0.15295979976654053, "grad_norm": 0.4917945206165314, "loss": 0.45592640042304994, "time": 1.5026135683059691, "epoch": 450, "memory": 35394, "step": 140737}
{"lr": 0.0006718132326193278, "data_time": 0.0028190135955810545, "grad_norm": 0.49790985882282257, "loss": 0.45856693983078, "time": 1.5214699268341065, "epoch": 450, "memory": 35394, "step": 140837}
{"lr": 0.0006706651320638207, "data_time": 0.5017338514328002, "grad_norm": 0.49816806316375734, "loss": 0.4630915731191635, "time": 1.5529969453811645, "epoch": 451, "memory": 35394, "step": 140950}
{"lr": 0.0006696493418753823, "data_time": 0.2868317127227783, "grad_norm": 0.5589890331029892, "loss": 0.4650901764631271, "time": 1.5298490285873414, "epoch": 451, "memory": 35394, "step": 141050}
{"lr": 0.0006686337675284839, "data_time": 0.003199124336242676, "grad_norm": 0.48651245832443235, "loss": 0.4626960426568985, "time": 1.5420993328094483, "epoch": 451, "memory": 35394, "step": 141150}
{"lr": 0.0006674864303824463, "data_time": 0.468861722946167, "grad_norm": 0.5186296999454498, "loss": 0.45915973782539365, "time": 1.5756585121154785, "epoch": 452, "memory": 35394, "step": 141263}
{"lr": 0.0006664713213832471, "data_time": 0.2626155614852905, "grad_norm": 0.45512165129184723, "loss": 0.46134079396724703, "time": 1.606174921989441, "epoch": 452, "memory": 35394, "step": 141363}
{"lr": 0.0006654564334879241, "data_time": 0.0804297685623169, "grad_norm": 0.48622157871723176, "loss": 0.45969220995903015, "time": 1.6626015424728393, "epoch": 452, "memory": 35394, "step": 141463}
{"lr": 0.0006643098783636961, "data_time": 0.589504885673523, "grad_norm": 0.5139493256807327, "loss": 0.4629585027694702, "time": 1.532458209991455, "epoch": 453, "memory": 35394, "step": 141576}
{"lr": 0.000663295467021047, "data_time": 0.21455390453338624, "grad_norm": 0.4950279802083969, "loss": 0.4673895925283432, "time": 1.5278385400772094, "epoch": 453, "memory": 35394, "step": 141676}
{"lr": 0.0006622812820410227, "data_time": 0.002960348129272461, "grad_norm": 0.4857962399721146, "loss": 0.46295020580291746, "time": 1.52605402469635, "epoch": 453, "memory": 35394, "step": 141776}
{"lr": 0.0006611355275382609, "data_time": 0.60075843334198, "grad_norm": 0.47318628132343293, "loss": 0.4664470613002777, "time": 1.5325217723846436, "epoch": 454, "memory": 35394, "step": 141889}
{"lr": 0.0006601218303081519, "data_time": 0.3950622320175171, "grad_norm": 0.5327951908111572, "loss": 0.45818697810173037, "time": 1.6213361978530885, "epoch": 454, "memory": 35394, "step": 141989}
{"lr": 0.000659108364695744, "data_time": 0.07942290306091308, "grad_norm": 0.4706589043140411, "loss": 0.45924786329269407, "time": 1.5014161825180055, "epoch": 454, "memory": 35394, "step": 142089}
{"lr": 0.000657963429401114, "data_time": 0.38557460308074953, "grad_norm": 0.47424596548080444, "loss": 0.46393424570560454, "time": 1.5509785890579224, "epoch": 455, "memory": 35394, "step": 142202}
{"lr": 0.0006569504627279527, "data_time": 0.3150128126144409, "grad_norm": 0.4935568690299988, "loss": 0.4562062680721283, "time": 1.5179010391235352, "epoch": 455, "memory": 35394, "step": 142302}
{"lr": 0.0006559377329238109, "data_time": 0.030915546417236327, "grad_norm": 0.5083304315805435, "loss": 0.46436485946178435, "time": 1.5735672235488891, "epoch": 455, "memory": 35394, "step": 142402}
{"lr": 0.0006547936354106905, "data_time": 0.5987199544906616, "grad_norm": 0.48005650639534, "loss": 0.46489902436733244, "time": 1.5350406646728516, "epoch": 456, "memory": 35394, "step": 142515}
{"lr": 0.0006537814157270331, "data_time": 0.2565108299255371, "grad_norm": 0.45189673602581026, "loss": 0.45762014389038086, "time": 1.5562376260757447, "epoch": 456, "memory": 35394, "step": 142615}
{"lr": 0.0006527694381598715, "data_time": 0.0027908563613891603, "grad_norm": 0.5022085934877396, "loss": 0.46226227283477783, "time": 1.5106909036636353, "epoch": 456, "memory": 35394, "step": 142715}
{"lr": 0.0006516261969880502, "data_time": 0.6276716947555542, "grad_norm": 0.5394176840782166, "loss": 0.4576847404241562, "time": 1.5481203317642211, "epoch": 457, "memory": 35394, "step": 142828}
{"lr": 0.0006506147407143333, "data_time": 0.31686785221099856, "grad_norm": 0.5010326743125916, "loss": 0.46156075596809387, "time": 1.5045914888381957, "epoch": 457, "memory": 35394, "step": 142928}
{"lr": 0.0006496035318006605, "data_time": 0.0037679195404052733, "grad_norm": 0.5401106417179108, "loss": 0.4604708433151245, "time": 1.6026639223098755, "epoch": 457, "memory": 35394, "step": 143028}
{"lr": 0.0006484611655160315, "data_time": 0.4906779289245605, "grad_norm": 0.514948058128357, "loss": 0.46124990582466124, "time": 1.5342911958694458, "epoch": 458, "memory": 35394, "step": 143141}
{"lr": 0.0006474504890603087, "data_time": 0.25241928100585936, "grad_norm": 0.5604171365499496, "loss": 0.4657848685979843, "time": 1.4864326477050782, "epoch": 458, "memory": 35394, "step": 143241}
{"lr": 0.0006464400652041669, "data_time": 0.004182338714599609, "grad_norm": 0.5200603038072587, "loss": 0.4623137265443802, "time": 1.7391314029693603, "epoch": 458, "memory": 35394, "step": 143341}
{"lr": 0.0006452985923384348, "data_time": 0.42098050117492675, "grad_norm": 0.5095210820436478, "loss": 0.45698720812797544, "time": 1.4783989906311035, "epoch": 459, "memory": 35394, "step": 143454}
{"lr": 0.0006442887120961099, "data_time": 0.15229637622833253, "grad_norm": 0.49671626687049864, "loss": 0.4570581316947937, "time": 1.5282291889190673, "epoch": 459, "memory": 35394, "step": 143554}
{"lr": 0.0006432790896888041, "data_time": 0.0029079675674438476, "grad_norm": 0.5482138901948929, "loss": 0.4672504663467407, "time": 1.5665754556655884, "epoch": 459, "memory": 35394, "step": 143654}
{"lr": 0.0006421385287591772, "data_time": 0.5480634927749634, "grad_norm": 0.4861539304256439, "loss": 0.4573762625455856, "time": 1.489629292488098, "epoch": 460, "memory": 35394, "step": 143767}
{"lr": 0.0006411294611127379, "data_time": 0.5079626560211181, "grad_norm": 0.5076023638248444, "loss": 0.46155781447887423, "time": 1.5197014570236207, "epoch": 460, "memory": 35394, "step": 143867}
{"lr": 0.0006401206565325721, "data_time": 0.010690593719482422, "grad_norm": 0.5245372176170349, "loss": 0.4669757753610611, "time": 1.763887143135071, "epoch": 460, "memory": 35394, "step": 143967}
{"lr": 0.0006389810260414682, "data_time": 0.4129917621612549, "grad_norm": 0.512938991189003, "loss": 0.4598793089389801, "time": 1.5280206441879272, "epoch": 461, "memory": 35394, "step": 144080}
{"lr": 0.000637972787360218, "data_time": 0.010643696784973145, "grad_norm": 0.5596128463745117, "loss": 0.4554563671350479, "time": 1.5508057355880738, "epoch": 461, "memory": 35394, "step": 144180}
{"lr": 0.0006369648169722293, "data_time": 0.0032871484756469725, "grad_norm": 0.5222501635551453, "loss": 0.4616184741258621, "time": 1.5766475915908813, "epoch": 461, "memory": 35394, "step": 144280}
{"lr": 0.0006358261354069693, "data_time": 0.4028985261917114, "grad_norm": 0.5291092723608017, "loss": 0.45963976681232455, "time": 1.5322589635849, "epoch": 462, "memory": 35394, "step": 144393}
{"lr": 0.0006348187420467654, "data_time": 0.22075996398925782, "grad_norm": 0.5500981658697128, "loss": 0.4608212947845459, "time": 1.6497743606567383, "epoch": 462, "memory": 35394, "step": 144493}
{"lr": 0.000633811622202459, "data_time": 0.0028829097747802733, "grad_norm": 0.5169151812791825, "loss": 0.46394682228565215, "time": 1.5558345079421998, "epoch": 462, "memory": 35394, "step": 144593}
{"lr": 0.0006326739080349722, "data_time": 0.42546329498291013, "grad_norm": 0.48118036389350893, "loss": 0.461764320731163, "time": 1.530112338066101, "epoch": 463, "memory": 35394, "step": 144706}
{"lr": 0.0006316673763379586, "data_time": 0.09503655433654785, "grad_norm": 0.5101545810699463, "loss": 0.4601598560810089, "time": 1.300179362297058, "epoch": 463, "memory": 35394, "step": 144806}
{"lr": 0.0006306611233750422, "data_time": 0.002952098846435547, "grad_norm": 0.482137867808342, "loss": 0.466073802113533, "time": 1.5871543407440185, "epoch": 463, "memory": 35394, "step": 144906}
{"lr": 0.0006295243950615641, "data_time": 0.5292709112167359, "grad_norm": 0.5477166205644608, "loss": 0.4632877707481384, "time": 1.5137897253036499, "epoch": 464, "memory": 35394, "step": 145019}
{"lr": 0.0006285187413559086, "data_time": 0.3652881383895874, "grad_norm": 0.48518839478492737, "loss": 0.4640524536371231, "time": 1.5476929903030396, "epoch": 464, "memory": 35394, "step": 145119}
{"lr": 0.0006275133715980253, "data_time": 0.003035569190979004, "grad_norm": 0.5112075984477997, "loss": 0.4634571969509125, "time": 1.5172111749649049, "epoch": 464, "memory": 35394, "step": 145219}
{"lr": 0.0006263776475787982, "data_time": 0.353069543838501, "grad_norm": 0.5453048199415207, "loss": 0.459948742389679, "time": 1.5750134468078614, "epoch": 465, "memory": 35394, "step": 145332}
{"lr": 0.0006253728881784216, "data_time": 0.027230429649353027, "grad_norm": 0.5235420525074005, "loss": 0.4564871430397034, "time": 1.4954256772994996, "epoch": 465, "memory": 35394, "step": 145432}
{"lr": 0.0006243684179348883, "data_time": 0.002930474281311035, "grad_norm": 0.5387842714786529, "loss": 0.46413208842277526, "time": 1.4877986431121826, "epoch": 465, "memory": 35394, "step": 145532}
{"lr": 0.000623233716633863, "data_time": 0.3651821851730347, "grad_norm": 0.5020798325538636, "loss": 0.4603706955909729, "time": 1.5149360656738282, "epoch": 466, "memory": 35394, "step": 145645}
{"lr": 0.0006222298678381814, "data_time": 0.23046977519989015, "grad_norm": 0.5374514937400818, "loss": 0.46361834108829497, "time": 1.5310673713684082, "epoch": 466, "memory": 35394, "step": 145745}
{"lr": 0.0006212263134037237, "data_time": 0.003547334671020508, "grad_norm": 0.5291926175355911, "loss": 0.46474379301071167, "time": 1.5286866664886474, "epoch": 466, "memory": 35394, "step": 145845}
{"lr": 0.0006200926532282594, "data_time": 0.5670375108718873, "grad_norm": 0.5310541063547134, "loss": 0.457297095656395, "time": 1.536671757698059, "epoch": 467, "memory": 35394, "step": 145958}
{"lr": 0.0006190897313219172, "data_time": 0.05202550888061523, "grad_norm": 0.5541076213121414, "loss": 0.45716977417469024, "time": 1.5343674659729003, "epoch": 467, "memory": 35394, "step": 146058}
{"lr": 0.0006180871089764026, "data_time": 0.002718615531921387, "grad_norm": 0.4940716356039047, "loss": 0.4504273056983948, "time": 1.3722693920135498, "epoch": 467, "memory": 35394, "step": 146158}
{"lr": 0.0006169545083169721, "data_time": 0.3927870988845825, "grad_norm": 0.5825093537569046, "loss": 0.4590290725231171, "time": 1.5524224519729615, "epoch": 468, "memory": 35394, "step": 146271}
{"lr": 0.0006159525295695754, "data_time": 0.012783288955688477, "grad_norm": 0.5075765997171402, "loss": 0.46468735337257383, "time": 1.5303179502487183, "epoch": 468, "memory": 35394, "step": 146371}
{"lr": 0.0006149508555777534, "data_time": 0.002582740783691406, "grad_norm": 0.5976453274488449, "loss": 0.4611683517694473, "time": 1.4913315296173095, "epoch": 468, "memory": 35394, "step": 146471}
{"lr": 0.0006138193328076367, "data_time": 0.4825137615203857, "grad_norm": 0.4839667022228241, "loss": 0.45958058834075927, "time": 1.5033769845962524, "epoch": 469, "memory": 35394, "step": 146584}
{"lr": 0.0006128183134734934, "data_time": 0.1456296920776367, "grad_norm": 0.5696675628423691, "loss": 0.45431122481822966, "time": 1.5128561973571777, "epoch": 469, "memory": 35394, "step": 146684}
{"lr": 0.0006118176040847261, "data_time": 0.002831864356994629, "grad_norm": 0.4731150597333908, "loss": 0.463539919257164, "time": 1.533247470855713, "epoch": 469, "memory": 35394, "step": 146784}
{"lr": 0.0006106871775597224, "data_time": 0.4967496871948242, "grad_norm": 0.6444210410118103, "loss": 0.46188818514347074, "time": 1.6030086994171142, "epoch": 470, "memory": 35394, "step": 146897}
{"lr": 0.0006096871338775755, "data_time": 0.2611356496810913, "grad_norm": 0.5409961909055709, "loss": 0.4577223628759384, "time": 1.544038224220276, "epoch": 470, "memory": 35394, "step": 146997}
{"lr": 0.0006086874053255775, "data_time": 0.0027668237686157226, "grad_norm": 0.5315483987331391, "loss": 0.4573668628931046, "time": 1.4954880237579347, "epoch": 470, "memory": 35394, "step": 147097}
{"lr": 0.0006075580933837002, "data_time": 0.4508376121520996, "grad_norm": 0.5485885441303253, "loss": 0.4649648189544678, "time": 1.5068805932998657, "epoch": 471, "memory": 35394, "step": 147210}
{"lr": 0.0006065590415764659, "data_time": 0.33312110900878905, "grad_norm": 0.5152737408876419, "loss": 0.46069738268852234, "time": 1.560388970375061, "epoch": 471, "memory": 35394, "step": 147310}
{"lr": 0.0006055603100790425, "data_time": 0.0029439210891723635, "grad_norm": 0.5017693936824799, "loss": 0.46056628227233887, "time": 1.517511248588562, "epoch": 471, "memory": 35394, "step": 147410}
{"lr": 0.0006044321310402245, "data_time": 0.2689925193786621, "grad_norm": 0.5082014709711075, "loss": 0.46844695806503295, "time": 1.3333730697631836, "epoch": 472, "memory": 35394, "step": 147523}
{"lr": 0.0006034340873147289, "data_time": 0.33131563663482666, "grad_norm": 0.52098308801651, "loss": 0.45949685871601104, "time": 1.499395751953125, "epoch": 472, "memory": 35394, "step": 147623}
{"lr": 0.000602436369073509, "data_time": 0.20706779956817628, "grad_norm": 0.5498181581497192, "loss": 0.46618711948394775, "time": 1.5186823606491089, "epoch": 472, "memory": 35394, "step": 147723}
{"lr": 0.0006013093412393038, "data_time": 0.3774076461791992, "grad_norm": 0.5414760500192642, "loss": 0.463500851392746, "time": 1.5398490667343139, "epoch": 473, "memory": 35394, "step": 147836}
{"lr": 0.0006003123217860197, "data_time": 0.07094728946685791, "grad_norm": 0.548964136838913, "loss": 0.4616803079843521, "time": 1.519063115119934, "epoch": 473, "memory": 35394, "step": 147936}
{"lr": 0.000599315632986196, "data_time": 0.0029817819595336914, "grad_norm": 0.5245636582374573, "loss": 0.4619672507047653, "time": 1.5298002719879151, "epoch": 473, "memory": 35394, "step": 148036}
{"lr": 0.0005981897746394843, "data_time": 0.5922524452209472, "grad_norm": 0.5623079776763916, "loss": 0.45342636704444883, "time": 1.5207454442977906, "epoch": 474, "memory": 35394, "step": 148149}
{"lr": 0.0005971937956322674, "data_time": 0.5806650638580322, "grad_norm": 0.5260100156068802, "loss": 0.4554383635520935, "time": 1.5402605056762695, "epoch": 474, "memory": 35394, "step": 148249}
{"lr": 0.0005961981524423327, "data_time": 0.07380490303039551, "grad_norm": 0.575971606373787, "loss": 0.4637920022010803, "time": 1.5265559434890748, "epoch": 474, "memory": 35394, "step": 148349}
{"lr": 0.000595073481847021, "data_time": 0.5819040298461914, "grad_norm": 0.4755389243364334, "loss": 0.4588101178407669, "time": 1.522194743156433, "epoch": 475, "memory": 35394, "step": 148462}
{"lr": 0.000594078559442849, "data_time": 0.16683032512664794, "grad_norm": 0.480205574631691, "loss": 0.46166918575763705, "time": 1.50151846408844, "epoch": 475, "memory": 35394, "step": 148562}
{"lr": 0.000593083978014333, "data_time": 0.0946965217590332, "grad_norm": 0.5593460589647293, "loss": 0.4607580125331879, "time": 1.5146509885787964, "epoch": 475, "memory": 35394, "step": 148662}
{"lr": 0.0005919605134150618, "data_time": 0.5207446336746215, "grad_norm": 0.5103044211864471, "loss": 0.46355157494544985, "time": 1.5399246215820312, "epoch": 476, "memory": 35394, "step": 148775}
{"lr": 0.0005909666637537736, "data_time": 0.21189112663269044, "grad_norm": 0.49471838772296906, "loss": 0.4615912318229675, "time": 1.4893072128295899, "epoch": 476, "memory": 35394, "step": 148875}
{"lr": 0.0005899731602209829, "data_time": 0.0023699283599853517, "grad_norm": 0.5212451010942459, "loss": 0.46054800152778624, "time": 1.5229119062423706, "epoch": 476, "memory": 35394, "step": 148975}
{"lr": 0.0005888509198428282, "data_time": 0.49967501163482664, "grad_norm": 0.5079700410366058, "loss": 0.46640723049640653, "time": 1.5237704277038575, "epoch": 477, "memory": 35394, "step": 149088}
{"lr": 0.0005878581590468589, "data_time": 0.21410205364227294, "grad_norm": 0.5276788592338562, "loss": 0.45435474514961244, "time": 1.4773242712020873, "epoch": 477, "memory": 35394, "step": 149188}
{"lr": 0.0005868657495266143, "data_time": 0.003293132781982422, "grad_norm": 0.5667618602514267, "loss": 0.4600092202425003, "time": 1.5727506399154663, "epoch": 477, "memory": 35394, "step": 149288}
{"lr": 0.0005857447515747919, "data_time": 0.5606072664260864, "grad_norm": 0.47063118517398833, "loss": 0.4626242071390152, "time": 1.5289234399795533, "epoch": 478, "memory": 35394, "step": 149401}
{"lr": 0.0005847530957489131, "data_time": 0.2965078353881836, "grad_norm": 0.5411208480596542, "loss": 0.4585691839456558, "time": 1.5224413633346559, "epoch": 478, "memory": 35394, "step": 149501}
{"lr": 0.0005837617963402879, "data_time": 0.002776479721069336, "grad_norm": 0.5358022511005401, "loss": 0.4628957897424698, "time": 1.440147304534912, "epoch": 478, "memory": 35394, "step": 149601}
{"lr": 0.0005826420589998592, "data_time": 0.4376487731933594, "grad_norm": 0.5029445350170135, "loss": 0.4581578433513641, "time": 1.5236953020095825, "epoch": 479, "memory": 35394, "step": 149714}
{"lr": 0.0005816515242309182, "data_time": 0.14151351451873778, "grad_norm": 0.51718869805336, "loss": 0.4574394732713699, "time": 1.5302005290985108, "epoch": 479, "memory": 35394, "step": 149814}
{"lr": 0.0005806613510149763, "data_time": 0.002397489547729492, "grad_norm": 0.5522157222032547, "loss": 0.46125592291355133, "time": 1.4843685626983643, "epoch": 479, "memory": 35394, "step": 149914}
{"lr": 0.0005795428924505526, "data_time": 0.6112150907516479, "grad_norm": 0.5095450431108475, "loss": 0.46050040423870087, "time": 1.5529227018356324, "epoch": 480, "memory": 35394, "step": 150027}
{"lr": 0.0005785534948072088, "data_time": 0.32810814380645753, "grad_norm": 0.5263363301753998, "loss": 0.45947431921958926, "time": 1.5501117944717406, "epoch": 480, "memory": 35394, "step": 150127}
{"lr": 0.0005775644638467458, "data_time": 0.22695391178131102, "grad_norm": 0.5664246261119843, "loss": 0.46253606379032136, "time": 1.5009998321533202, "epoch": 480, "memory": 35394, "step": 150227}
{"lr": 0.0005764473022021956, "data_time": 0.5331809997558594, "grad_norm": 0.46482665538787843, "loss": 0.4641199380159378, "time": 1.5441081762313842, "epoch": 481, "memory": 35394, "step": 150340}
{"lr": 0.0005754590577346632, "data_time": 0.1943070888519287, "grad_norm": 0.5136082470417023, "loss": 0.45533864200115204, "time": 1.5303963899612427, "epoch": 481, "memory": 35394, "step": 150440}
{"lr": 0.0005744711850739454, "data_time": 0.003175234794616699, "grad_norm": 0.5346131771802902, "loss": 0.4657626271247864, "time": 1.5402819156646728, "epoch": 481, "memory": 35394, "step": 150540}
{"lr": 0.0005733553384720955, "data_time": 0.5879107236862182, "grad_norm": 0.5808610200881958, "loss": 0.462847563624382, "time": 1.5268754005432128, "epoch": 482, "memory": 35394, "step": 150653}
{"lr": 0.0005723682632118817, "data_time": 0.28968403339385984, "grad_norm": 0.5165884524583817, "loss": 0.46716209352016447, "time": 1.4864904165267945, "epoch": 482, "memory": 35394, "step": 150753}
{"lr": 0.0005713815648763834, "data_time": 0.10395543575286866, "grad_norm": 0.5064650028944016, "loss": 0.4630066454410553, "time": 1.6756654500961303, "epoch": 482, "memory": 35394, "step": 150853}
{"lr": 0.0005702670514187283, "data_time": 0.3549320220947266, "grad_norm": 0.5257883250713349, "loss": 0.46376236379146574, "time": 1.4849421977996826, "epoch": 483, "memory": 35394, "step": 150966}
{"lr": 0.0005692811613783723, "data_time": 0.1659482717514038, "grad_norm": 0.5633630663156509, "loss": 0.45622317492961884, "time": 1.5119279623031616, "epoch": 483, "memory": 35394, "step": 151066}
{"lr": 0.0005682956533745206, "data_time": 0.0030648231506347655, "grad_norm": 0.5982740819454193, "loss": 0.46361383497715, "time": 1.5560280561447144, "epoch": 483, "memory": 35394, "step": 151166}
{"lr": 0.0005671824911409304, "data_time": 0.3751641035079956, "grad_norm": 0.5536355286836624, "loss": 0.4638142615556717, "time": 1.5683090448379517, "epoch": 484, "memory": 35394, "step": 151279}
{"lr": 0.0005661978023137444, "data_time": 0.18420546054840087, "grad_norm": NaN, "loss": 0.4588893800973892, "time": 1.5074984312057496, "epoch": 484, "memory": 35394, "step": 151379}
{"lr": 0.0005652135006286523, "data_time": 0.003213357925415039, "grad_norm": 0.5020644158124924, "loss": 0.4638948023319244, "time": 1.4910431146621703, "epoch": 484, "memory": 35394, "step": 151479}
{"lr": 0.0005641017076770748, "data_time": 0.06551260948181152, "grad_norm": 0.5599041193723678, "loss": 0.4610550343990326, "time": 1.2407179594039917, "epoch": 485, "memory": 35394, "step": 151592}
{"lr": 0.0005631182360368874, "data_time": 0.0030080795288085936, "grad_norm": 0.592026886343956, "loss": 0.46317499279975893, "time": 1.5598883628845215, "epoch": 485, "memory": 35394, "step": 151692}
{"lr": 0.0005621351566381029, "data_time": 0.0030719280242919923, "grad_norm": 0.6008357524871826, "loss": 0.45761704742908477, "time": 1.5036805152893067, "epoch": 485, "memory": 35394, "step": 151792}
{"lr": 0.0005610247510042744, "data_time": 0.5535117149353027, "grad_norm": 0.5296362787485123, "loss": 0.46249642968177795, "time": 1.5499426126480103, "epoch": 486, "memory": 35394, "step": 151905}
{"lr": 0.0005600425125051645, "data_time": 0.1263885021209717, "grad_norm": 0.536565688252449, "loss": 0.457360053062439, "time": 1.5590749025344848, "epoch": 486, "memory": 35394, "step": 152005}
{"lr": 0.0005590606713404045, "data_time": 0.0029182195663452148, "grad_norm": 0.6204862117767334, "loss": 0.4663865566253662, "time": 1.5604133129119873, "epoch": 486, "memory": 35394, "step": 152105}
{"lr": 0.0005579516710375552, "data_time": 0.5601376295089722, "grad_norm": 0.538968563079834, "loss": 0.4629732608795166, "time": 1.4856053113937377, "epoch": 487, "memory": 35394, "step": 152218}
{"lr": 0.0005569706816136005, "data_time": 0.36064040660858154, "grad_norm": 0.4982598513364792, "loss": 0.45887974798679354, "time": 1.539495873451233, "epoch": 487, "memory": 35394, "step": 152318}
{"lr": 0.0005559900946104958, "data_time": 0.003737950325012207, "grad_norm": 0.6285032570362091, "loss": 0.46043393313884734, "time": 1.5571807384490968, "epoch": 487, "memory": 35394, "step": 152418}
{"lr": 0.0005548825176290598, "data_time": 0.48322784900665283, "grad_norm": 0.47279633283615113, "loss": 0.45658910274505615, "time": 1.5296099424362182, "epoch": 488, "memory": 35394, "step": 152531}
{"lr": 0.0005539027931940744, "data_time": 0.23497798442840576, "grad_norm": 0.5914790272712708, "loss": 0.4564270734786987, "time": 1.5365944862365724, "epoch": 488, "memory": 35394, "step": 152631}
{"lr": 0.0005529234762599117, "data_time": 0.002411675453186035, "grad_norm": 0.5224710553884506, "loss": 0.45935442447662356, "time": 1.5261783599853516, "epoch": 488, "memory": 35394, "step": 152731}
{"lr": 0.0005518173405672318, "data_time": 0.4651197910308838, "grad_norm": 0.5574983388185502, "loss": 0.46188009083271026, "time": 1.573249888420105, "epoch": 489, "memory": 35394, "step": 152844}
{"lr": 0.0005508388970145086, "data_time": 0.11489701271057129, "grad_norm": 0.5987694233655929, "loss": 0.46131073832511904, "time": 1.4201719760894775, "epoch": 489, "memory": 35394, "step": 152944}
{"lr": 0.0005498608660359705, "data_time": 0.0030434846878051756, "grad_norm": 0.5695277988910675, "loss": 0.45799009799957274, "time": 1.5320436477661132, "epoch": 489, "memory": 35394, "step": 153044}
{"lr": 0.0005487561895760097, "data_time": 0.5679436445236206, "grad_norm": 0.5341174811124801, "loss": 0.4569701462984085, "time": 1.4999753475189208, "epoch": 490, "memory": 35394, "step": 153157}
{"lr": 0.0005477790427780644, "data_time": 0.13051080703735352, "grad_norm": 0.5778803378343582, "loss": 0.4587729275226593, "time": 1.5608603477478027, "epoch": 490, "memory": 35394, "step": 153257}
{"lr": 0.0005468023136209723, "data_time": 0.0030326366424560545, "grad_norm": 0.539031234383583, "loss": 0.45704467594623566, "time": 1.501146912574768, "epoch": 490, "memory": 35394, "step": 153357}
{"lr": 0.0005456991143140209, "data_time": 0.43182289600372314, "grad_norm": 0.6053309231996536, "loss": 0.4581709265708923, "time": 1.5367366313934325, "epoch": 491, "memory": 35394, "step": 153470}
{"lr": 0.0005447232801223302, "data_time": 0.049549651145935056, "grad_norm": 0.46497112810611724, "loss": 0.463441064953804, "time": 1.5309473037719727, "epoch": 491, "memory": 35394, "step": 153570}
{"lr": 0.0005437478686313871, "data_time": 0.003130030632019043, "grad_norm": 0.5682653397321701, "loss": 0.45440171360969545, "time": 1.5461554050445556, "epoch": 491, "memory": 35394, "step": 153670}
{"lr": 0.0005426461643737732, "data_time": 0.4594928741455078, "grad_norm": 0.5356595009565354, "loss": 0.4611297190189362, "time": 1.503948163986206, "epoch": 492, "memory": 35394, "step": 153783}
{"lr": 0.0005416716586185216, "data_time": 0.19142239093780516, "grad_norm": 0.5050959497690201, "loss": 0.4583604037761688, "time": 1.5086573839187623, "epoch": 492, "memory": 35394, "step": 153883}
{"lr": 0.0005406975806170557, "data_time": 0.004126644134521485, "grad_norm": 0.5234371691942215, "loss": 0.4615045845508575, "time": 1.5574122428894044, "epoch": 492, "memory": 35394, "step": 153983}
{"lr": 0.0005395973892808557, "data_time": 0.3474008798599243, "grad_norm": 0.5006029725074768, "loss": 0.4648973286151886, "time": 1.6069084882736206, "epoch": 493, "memory": 35394, "step": 154096}
{"lr": 0.0005386242277706793, "data_time": 0.20287158489227294, "grad_norm": 0.5349254429340362, "loss": 0.46415560245513915, "time": 1.5565101385116578, "epoch": 493, "memory": 35394, "step": 154196}
{"lr": 0.0005376514990603836, "data_time": 0.002984952926635742, "grad_norm": 0.5740128695964813, "loss": 0.4590358644723892, "time": 1.5330979347229003, "epoch": 493, "memory": 35394, "step": 154296}
{"lr": 0.0005365528384931286, "data_time": 0.4312122344970703, "grad_norm": 0.5672017484903336, "loss": 0.45925294458866117, "time": 1.5685471296310425, "epoch": 494, "memory": 35394, "step": 154409}
{"lr": 0.0005355810370148541, "data_time": 0.08004193305969239, "grad_norm": 0.5572786718606949, "loss": 0.4563960939645767, "time": 1.5512653827667235, "epoch": 494, "memory": 35394, "step": 154509}
{"lr": 0.0005346096733755388, "data_time": 0.003174161911010742, "grad_norm": 0.5049933582544327, "loss": 0.4635860979557037, "time": 1.519587230682373, "epoch": 494, "memory": 35394, "step": 154609}
{"lr": 0.0005335125613999292, "data_time": 0.42724032402038575, "grad_norm": 0.532324680685997, "loss": 0.4640538841485977, "time": 1.5301548480987548, "epoch": 495, "memory": 35394, "step": 154722}
{"lr": 0.0005325421357183216, "data_time": 0.15294604301452636, "grad_norm": 0.5103588700294495, "loss": 0.458675342798233, "time": 1.5518003940582275, "epoch": 495, "memory": 35394, "step": 154822}
{"lr": 0.0005315721529076471, "data_time": 0.0029221057891845705, "grad_norm": 0.5934849470853806, "loss": 0.4648789942264557, "time": 1.5757399797439575, "epoch": 495, "memory": 35394, "step": 154922}
{"lr": 0.0005304766073212597, "data_time": 0.4820857048034668, "grad_norm": 0.5921984612941742, "loss": 0.458652526140213, "time": 1.5868629693984986, "epoch": 496, "memory": 35394, "step": 155035}
{"lr": 0.0005295075731787664, "data_time": 0.18314032554626464, "grad_norm": 0.5794021040201187, "loss": 0.45575565099716187, "time": 1.709017038345337, "epoch": 496, "memory": 35394, "step": 155135}
{"lr": 0.0005285389869319961, "data_time": 0.003087019920349121, "grad_norm": 0.5827316075563431, "loss": 0.46135830581188203, "time": 1.5281165599823, "epoch": 496, "memory": 35394, "step": 155235}
{"lr": 0.0005274450255069989, "data_time": 0.5124651908874511, "grad_norm": 0.5473931640386581, "loss": 0.4588864237070084, "time": 1.5649043798446656, "epoch": 497, "memory": 35394, "step": 155348}
{"lr": 0.0005264773986234926, "data_time": 0.17707428932189942, "grad_norm": 0.5356500327587128, "loss": 0.45761882066726683, "time": 1.558671236038208, "epoch": 497, "memory": 35394, "step": 155448}
{"lr": 0.0005255102246532335, "data_time": 0.0032052993774414062, "grad_norm": 0.5721443325281144, "loss": 0.46212306022644045, "time": 1.5348662376403808, "epoch": 497, "memory": 35394, "step": 155548}
{"lr": 0.0005244178651360967, "data_time": 0.192303466796875, "grad_norm": 0.6118470460176468, "loss": 0.4589479684829712, "time": 1.3014684677124024, "epoch": 498, "memory": 35394, "step": 155661}
{"lr": 0.0005234516612086219, "data_time": 0.2526037931442261, "grad_norm": 0.6114307135343552, "loss": 0.4571203917264938, "time": 1.529236912727356, "epoch": 498, "memory": 35394, "step": 155761}
{"lr": 0.0005224859152045709, "data_time": 0.049369287490844724, "grad_norm": 0.5509806334972381, "loss": 0.4592346906661987, "time": 1.524350357055664, "epoch": 498, "memory": 35394, "step": 155861}
{"lr": 0.0005213951753157769, "data_time": 0.5610334157943726, "grad_norm": 0.5899746805429459, "loss": 0.45857113897800444, "time": 1.5002373933792115, "epoch": 499, "memory": 35394, "step": 155974}
{"lr": 0.0005204304100182949, "data_time": 0.37409045696258547, "grad_norm": 0.6172028541564941, "loss": 0.4685505419969559, "time": 1.5381744146347045, "epoch": 499, "memory": 35394, "step": 156074}
{"lr": 0.0005194661076469842, "data_time": 0.0030648469924926757, "grad_norm": 0.5432569772005081, "loss": 0.46283058226108553, "time": 1.646781611442566, "epoch": 499, "memory": 35394, "step": 156174}
{"lr": 0.0005183770050807421, "data_time": 0.3113562822341919, "grad_norm": 0.5680852502584457, "loss": 0.45721822082996366, "time": 1.530108904838562, "epoch": 500, "memory": 35394, "step": 156287}
{"lr": 0.0005174136940638746, "data_time": 0.0020751953125, "grad_norm": 0.5976984024047851, "loss": 0.4582570940256119, "time": 1.577265977859497, "epoch": 500, "memory": 35394, "step": 156387}
{"lr": 0.0005164508509684164, "data_time": 0.0026290416717529297, "grad_norm": 0.5553428679704666, "loss": 0.45900127589702605, "time": 1.5196784496307374, "epoch": 500, "memory": 35394, "step": 156487}
{"lr": 0.0005153634033923735, "data_time": 0.2608260869979858, "grad_norm": 0.5606660068035125, "loss": 0.4588764250278473, "time": 1.5113454341888428, "epoch": 501, "memory": 35394, "step": 156600}
{"lr": 0.0005144015622831511, "data_time": 0.002613067626953125, "grad_norm": 0.5385122478008271, "loss": 0.45291181802749636, "time": 1.4721307039260865, "epoch": 501, "memory": 35394, "step": 156700}
{"lr": 0.0005134401940829868, "data_time": 0.0033372879028320313, "grad_norm": 0.4804727166891098, "loss": 0.4590806424617767, "time": 1.5128250598907471, "epoch": 501, "memory": 35394, "step": 156800}
{"lr": 0.0005123544191379454, "data_time": 0.5014004945755005, "grad_norm": 0.5436036467552186, "loss": 0.46082858741283417, "time": 1.5744711637496949, "epoch": 502, "memory": 35394, "step": 156913}
{"lr": 0.0005113940635395553, "data_time": 0.012587189674377441, "grad_norm": 0.5309867799282074, "loss": 0.457215690612793, "time": 1.5246013879776001, "epoch": 502, "memory": 35394, "step": 157013}
{"lr": 0.0005104341858301999, "data_time": 0.0029270410537719726, "grad_norm": 0.514956745505333, "loss": 0.4535168468952179, "time": 1.495697569847107, "epoch": 502, "memory": 35394, "step": 157113}
{"lr": 0.0005093501011298238, "data_time": 0.5413076877593994, "grad_norm": 0.5195391297340393, "loss": 0.45666311085224154, "time": 1.5513487100601195, "epoch": 503, "memory": 35394, "step": 157226}
{"lr": 0.0005083912466213542, "data_time": 0.0512681245803833, "grad_norm": 0.5349285036325455, "loss": 0.458283269405365, "time": 1.5762327432632446, "epoch": 503, "memory": 35394, "step": 157326}
{"lr": 0.0005074328749741414, "data_time": 0.0029040098190307615, "grad_norm": 0.5741516768932342, "loss": 0.45963424146175386, "time": 1.581451654434204, "epoch": 503, "memory": 35394, "step": 157426}
{"lr": 0.0005063504981046784, "data_time": 0.5022867918014526, "grad_norm": 0.5319397628307343, "loss": 0.46190951466560365, "time": 1.506193470954895, "epoch": 504, "memory": 35394, "step": 157539}
{"lr": 0.0005053931602408654, "data_time": 0.26197185516357424, "grad_norm": 0.5479814887046814, "loss": 0.4547120273113251, "time": 1.517871618270874, "epoch": 504, "memory": 35394, "step": 157639}
{"lr": 0.0005044363102027007, "data_time": 0.0033188819885253905, "grad_norm": 0.6046606123447418, "loss": 0.4612723082304001, "time": 1.3888951301574708, "epoch": 504, "memory": 35394, "step": 157739}
{"lr": 0.000503355658722694, "data_time": 0.5217068672180176, "grad_norm": 0.5501616984605789, "loss": 0.4592925816774368, "time": 1.5517553806304931, "epoch": 505, "memory": 35394, "step": 157852}
{"lr": 0.0005023998530336704, "data_time": 0.30165092945098876, "grad_norm": 0.627439308166504, "loss": 0.4570551455020905, "time": 1.536686372756958, "epoch": 505, "memory": 35394, "step": 157952}
{"lr": 0.0005014445401267726, "data_time": 0.0026512622833251955, "grad_norm": 0.5474669188261032, "loss": 0.45613881945610046, "time": 1.5015605449676515, "epoch": 505, "memory": 35394, "step": 158052}
{"lr": 0.000500365631566774, "data_time": 0.09397952556610108, "grad_norm": 0.5629245668649674, "loss": 0.46534491777420045, "time": 1.5869518756866454, "epoch": 506, "memory": 35394, "step": 158165}
{"lr": 0.0004994113735578193, "data_time": 0.0021622896194458006, "grad_norm": 0.6077363610267639, "loss": 0.4653900980949402, "time": 1.3438088417053222, "epoch": 506, "memory": 35394, "step": 158265}
{"lr": 0.0004984576132794731, "data_time": 0.003852081298828125, "grad_norm": 0.5892728090286254, "loss": 0.4560708671808243, "time": 1.5610679864883423, "epoch": 506, "memory": 35394, "step": 158365}
{"lr": 0.0004973804651417609, "data_time": 0.5097151279449463, "grad_norm": 0.5595000654458999, "loss": 0.45517794191837313, "time": 1.7473594665527343, "epoch": 507, "memory": 35394, "step": 158478}
{"lr": 0.0004964277702930466, "data_time": 0.3326703071594238, "grad_norm": 0.6270623445510864, "loss": 0.453777015209198, "time": 1.6078380346298218, "epoch": 507, "memory": 35394, "step": 158578}
{"lr": 0.0004954755781153499, "data_time": 0.13898885250091553, "grad_norm": 0.4893502563238144, "loss": 0.4600953429937363, "time": 1.5757518053054809, "epoch": 507, "memory": 35394, "step": 158678}
{"lr": 0.0004944002078736446, "data_time": 0.4451582908630371, "grad_norm": 0.5187349587678909, "loss": 0.46029279828071595, "time": 1.4865086555480957, "epoch": 508, "memory": 35394, "step": 158791}
{"lr": 0.0004934490916399837, "data_time": 0.38446483612060545, "grad_norm": 0.5799892663955688, "loss": 0.45970989763736725, "time": 1.7173212051391602, "epoch": 508, "memory": 35394, "step": 158891}
{"lr": 0.0004924984830095934, "data_time": 0.0031171560287475584, "grad_norm": 0.5966295003890991, "loss": 0.45291672050952914, "time": 1.5767140150070191, "epoch": 508, "memory": 35394, "step": 158991}
{"lr": 0.0004914249081087753, "data_time": 0.8507306575775146, "grad_norm": 0.5843822747468949, "loss": 0.46114454567432406, "time": 2.970307159423828, "epoch": 509, "memory": 35394, "step": 159104}
{"lr": 0.0004904753859193723, "data_time": 0.3613182783126831, "grad_norm": 0.6435741513967514, "loss": 0.4579773128032684, "time": 3.210129904747009, "epoch": 509, "memory": 35394, "step": 159204}
{"lr": 0.0004895263762572609, "data_time": 0.0032922983169555663, "grad_norm": 0.5850054740905761, "loss": 0.4582098424434662, "time": 3.0440871953964233, "epoch": 509, "memory": 35394, "step": 159304}
{"lr": 0.0004884546141130843, "data_time": 2.7474007844924926, "grad_norm": 0.5631227135658264, "loss": 0.46448039412498476, "time": 4.155089974403381, "epoch": 510, "memory": 35394, "step": 159417}
{"lr": 0.00048750670137128615, "data_time": 0.9394108295440674, "grad_norm": 0.5231180697679519, "loss": 0.4547161817550659, "time": 3.300037908554077, "epoch": 510, "memory": 35394, "step": 159517}
{"lr": 0.00048655930607248535, "data_time": 0.002499294281005859, "grad_norm": 0.5901965975761414, "loss": 0.4617908239364624, "time": 2.8115094900131226, "epoch": 510, "memory": 35394, "step": 159617}
{"lr": 0.0004854893740712972, "data_time": 1.6587432384490968, "grad_norm": 0.5733870327472687, "loss": 0.4604768007993698, "time": 2.983690595626831, "epoch": 511, "memory": 35394, "step": 159730}
{"lr": 0.00048454308615434097, "data_time": 3.477441143989563, "grad_norm": 0.5937664657831192, "loss": 0.4568196773529053, "time": 5.647061705589294, "epoch": 511, "memory": 35394, "step": 159830}
{"lr": 0.0004835973205876925, "data_time": 0.30270140171051024, "grad_norm": 0.5688829392194747, "loss": 0.45789078176021575, "time": 3.3921017169952394, "epoch": 511, "memory": 35394, "step": 159930}
{"lr": 0.0004825292360861529, "data_time": 1.8749914646148682, "grad_norm": 0.5745700478553772, "loss": 0.460272353887558, "time": 3.3532898902893065, "epoch": 512, "memory": 35394, "step": 160043}
{"lr": 0.00048158458834491686, "data_time": 1.0043655157089233, "grad_norm": 0.5375511109828949, "loss": 0.45932371020317075, "time": 3.2602980852127077, "epoch": 512, "memory": 35394, "step": 160143}
{"lr": 0.0004806404678528272, "data_time": 0.3387583255767822, "grad_norm": 0.5444638669490814, "loss": 0.4579779088497162, "time": 3.7073358297348022, "epoch": 512, "memory": 35394, "step": 160243}
{"lr": 0.0004795742481776245, "data_time": 1.9921919107437134, "grad_norm": 0.5790921598672867, "loss": 0.46511963605880735, "time": 3.492551898956299, "epoch": 513, "memory": 35394, "step": 160356}
{"lr": 0.00047863125593638074, "data_time": 1.175442600250244, "grad_norm": 0.5680066406726837, "loss": 0.4623746633529663, "time": 3.545464110374451, "epoch": 513, "memory": 35394, "step": 160456}
{"lr": 0.00047768879583456793, "data_time": 0.0032341718673706056, "grad_norm": 0.5369529068470001, "loss": 0.46046155095100405, "time": 3.1179643630981446, "epoch": 513, "memory": 35394, "step": 160556}
{"lr": 0.0004766244582821388, "data_time": 1.842547345161438, "grad_norm": 0.5379068940877915, "loss": 0.4629140555858612, "time": 3.1010776281356813, "epoch": 514, "memory": 35394, "step": 160669}
{"lr": 0.00047568313683830313, "data_time": 0.41162910461425783, "grad_norm": 0.6502853512763977, "loss": 0.4629322201013565, "time": 1.753369975090027, "epoch": 514, "memory": 35394, "step": 160769}
{"lr": 0.00047474235241555047, "data_time": 0.002177262306213379, "grad_norm": 0.569971427321434, "loss": 0.45891897976398466, "time": 1.6527966260910034, "epoch": 514, "memory": 35394, "step": 160869}
{"lr": 0.00047367991425179945, "data_time": 0.5697051763534546, "grad_norm": 0.5624025881290435, "loss": 0.4575932830572128, "time": 1.6969816923141479, "epoch": 515, "memory": 35394, "step": 160982}
{"lr": 0.0004727402788756844, "data_time": 0.11967153549194336, "grad_norm": 0.5604249536991119, "loss": 0.4558804750442505, "time": 1.575979471206665, "epoch": 515, "memory": 35394, "step": 161082}
{"lr": 0.00047180118539359207, "data_time": 0.00325314998626709, "grad_norm": 0.5897705167531967, "loss": 0.4577422797679901, "time": 1.6036107063293457, "epoch": 515, "memory": 35394, "step": 161182}
{"lr": 0.00047074066385361144, "data_time": 0.2609894275665283, "grad_norm": 0.624432361125946, "loss": 0.46022290289402007, "time": 1.594050645828247, "epoch": 516, "memory": 35394, "step": 161295}
{"lr": 0.0004698027297881759, "data_time": 0.0695605754852295, "grad_norm": 0.571283933520317, "loss": 0.46553085148334505, "time": 1.5724658489227294, "epoch": 516, "memory": 35394, "step": 161395}
{"lr": 0.00046886534248091337, "data_time": 0.0030626296997070313, "grad_norm": 0.5926025062799454, "loss": 0.45699886977672577, "time": 1.5531309366226196, "epoch": 516, "memory": 35394, "step": 161495}
{"lr": 0.000467806754768704, "data_time": 0.4447868824005127, "grad_norm": 0.6207335293292999, "loss": 0.46785156726837157, "time": 1.554833173751831, "epoch": 517, "memory": 35394, "step": 161608}
{"lr": 0.0004668705372293098, "data_time": 0.00239865779876709, "grad_norm": 0.6097514420747757, "loss": 0.4559932738542557, "time": 1.5116185426712037, "epoch": 517, "memory": 35394, "step": 161708}
{"lr": 0.0004659348713033685, "data_time": 0.0025194883346557617, "grad_norm": 0.6403893321752548, "loss": 0.46002067923545836, "time": 1.6269113302230835, "epoch": 517, "memory": 35394, "step": 161808}
{"lr": 0.00046487823459155967, "data_time": 0.5953478574752807, "grad_norm": 0.5889110833406448, "loss": 0.4600794315338135, "time": 1.53009774684906, "epoch": 518, "memory": 35394, "step": 161921}
{"lr": 0.00046394374876572187, "data_time": 0.41346230506896975, "grad_norm": 0.6157020479440689, "loss": 0.45116843581199645, "time": 1.5334437608718872, "epoch": 518, "memory": 35394, "step": 162021}
{"lr": 0.00046300981939966835, "data_time": 0.10327012538909912, "grad_norm": 0.5623261362314225, "loss": 0.4562139749526978, "time": 1.5101515054702759, "epoch": 518, "memory": 35394, "step": 162121}
{"lr": 0.0004619551508292403, "data_time": 0.36790492534637453, "grad_norm": 0.5813279896974564, "loss": 0.4648825407028198, "time": 1.56415114402771, "epoch": 519, "memory": 35394, "step": 162234}
{"lr": 0.00046102241187638115, "data_time": 0.003400588035583496, "grad_norm": 0.5699620336294174, "loss": 0.4617522269487381, "time": 1.5269075393676759, "epoch": 519, "memory": 35394, "step": 162334}
{"lr": 0.000460090234220612, "data_time": 0.004899668693542481, "grad_norm": 0.6245952606201172, "loss": 0.46123968064785004, "time": 1.5205024003982544, "epoch": 519, "memory": 35394, "step": 162434}
{"lr": 0.00045903755090061617, "data_time": 0.5372252225875854, "grad_norm": 0.6377789497375488, "loss": 0.4588848382234573, "time": 1.5333790063858033, "epoch": 520, "memory": 35394, "step": 162547}
{"lr": 0.00045810657395182206, "data_time": 0.15473086833953859, "grad_norm": 0.6027052670717239, "loss": 0.45867170095443727, "time": 1.5343554735183715, "epoch": 520, "memory": 35394, "step": 162647}
{"lr": 0.00045717616312831614, "data_time": 0.0031595230102539062, "grad_norm": 0.6175327599048615, "loss": 0.45725384056568147, "time": 1.507278037071228, "epoch": 520, "memory": 35394, "step": 162747}
{"lr": 0.0004561254821356005, "data_time": 0.5597583770751953, "grad_norm": 0.5315187007188797, "loss": 0.45652406513690946, "time": 1.500854992866516, "epoch": 521, "memory": 35394, "step": 162860}
{"lr": 0.00045519628229337113, "data_time": 0.533278226852417, "grad_norm": 0.6503793358802795, "loss": 0.4545745670795441, "time": 1.5630169630050659, "epoch": 521, "memory": 35394, "step": 162960}
{"lr": 0.00045426765339544583, "data_time": 0.0033704042434692383, "grad_norm": 0.5897072046995163, "loss": 0.46038762032985686, "time": 1.569338321685791, "epoch": 521, "memory": 35394, "step": 163060}
{"lr": 0.0004532189917743762, "data_time": 0.5741101741790772, "grad_norm": 0.5501600086688996, "loss": 0.4628905147314072, "time": 1.5736200332641601, "epoch": 522, "memory": 35394, "step": 163173}
{"lr": 0.0004522915841123845, "data_time": 0.36529173851013186, "grad_norm": 0.5362219333648681, "loss": 0.4615465044975281, "time": 1.5537465572357179, "epoch": 522, "memory": 35394, "step": 163273}
{"lr": 0.0004513647522044514, "data_time": 0.02028317451477051, "grad_norm": 0.5303890377283096, "loss": 0.45958341658115387, "time": 1.5846296787261962, "epoch": 522, "memory": 35394, "step": 163373}
{"lr": 0.000450318126966634, "data_time": 0.42580626010894773, "grad_norm": 0.5559818327426911, "loss": 0.4614006966352463, "time": 1.6230627059936524, "epoch": 523, "memory": 35394, "step": 163486}
{"lr": 0.0004493925265294805, "data_time": 0.08009936809539794, "grad_norm": 0.5925600469112396, "loss": 0.4552618443965912, "time": 1.5584106683731078, "epoch": 523, "memory": 35394, "step": 163586}
{"lr": 0.000448467506646798, "data_time": 0.0032922983169555663, "grad_norm": 0.5334570795297623, "loss": 0.45697190761566164, "time": 1.5710123777389526, "epoch": 523, "memory": 35394, "step": 163686}
{"lr": 0.0004474229347708036, "data_time": 0.3834860324859619, "grad_norm": 0.6008316189050674, "loss": 0.4614873379468918, "time": 1.6039804458618163, "epoch": 524, "memory": 35394, "step": 163799}
{"lr": 0.00044649915657376984, "data_time": 0.10677640438079834, "grad_norm": 0.574452418088913, "loss": 0.45891413986682894, "time": 1.541370677947998, "epoch": 524, "memory": 35394, "step": 163899}
{"lr": 0.00044557596372220276, "data_time": 0.0024126529693603515, "grad_norm": 0.625300282239914, "loss": 0.46030505299568175, "time": 1.5358513355255128, "epoch": 524, "memory": 35394, "step": 163999}
{"lr": 0.0004445334621532942, "data_time": 0.586776852607727, "grad_norm": 0.5922342240810394, "loss": 0.45676793456077575, "time": 1.5636883497238159, "epoch": 525, "memory": 35394, "step": 164112}
{"lr": 0.0004436115211821028, "data_time": 0.14520866870880128, "grad_norm": 0.5434729933738709, "loss": 0.46640354096889497, "time": 1.548713207244873, "epoch": 525, "memory": 35394, "step": 164212}
{"lr": 0.00044269017033787773, "data_time": 0.0031110525131225588, "grad_norm": 0.597137176990509, "loss": 0.46528729498386384, "time": 1.5753831624984742, "epoch": 525, "memory": 35394, "step": 164312}
{"lr": 0.0004416497559877314, "data_time": 0.5026307344436646, "grad_norm": 0.551817125082016, "loss": 0.4571268528699875, "time": 1.5401925325393677, "epoch": 526, "memory": 35394, "step": 164425}
{"lr": 0.00044072966719829884, "data_time": 0.18262624740600586, "grad_norm": 0.5844446390867233, "loss": 0.4520966559648514, "time": 1.5593289852142334, "epoch": 526, "memory": 35394, "step": 164525}
{"lr": 0.0004398101733077599, "data_time": 0.0028194189071655273, "grad_norm": 0.538659143447876, "loss": 0.4604122281074524, "time": 1.5595536470413207, "epoch": 526, "memory": 35394, "step": 164625}
{"lr": 0.00043877186305419367, "data_time": 0.552769947052002, "grad_norm": 0.5652240455150604, "loss": 0.46242654919624326, "time": 1.5597591876983643, "epoch": 527, "memory": 35394, "step": 164738}
{"lr": 0.00043785364137239203, "data_time": 0.3946515083312988, "grad_norm": 0.5542545914649963, "loss": 0.4572336435317993, "time": 1.5771543264389039, "epoch": 527, "memory": 35394, "step": 164838}
{"lr": 0.0004369360193517605, "data_time": 0.10943892002105712, "grad_norm": 0.5733958154916763, "loss": 0.4565331518650055, "time": 1.5122846364974976, "epoch": 527, "memory": 35394, "step": 164938}
{"lr": 0.00043589983003845844, "data_time": 0.33503859043121337, "grad_norm": 0.53721282184124, "loss": 0.46028787195682525, "time": 1.5631235122680665, "epoch": 528, "memory": 35394, "step": 165051}
{"lr": 0.00043498349035986974, "data_time": 0.0029850006103515625, "grad_norm": 0.6115399062633514, "loss": 0.45936770737171173, "time": 1.562752103805542, "epoch": 528, "memory": 35394, "step": 165151}
{"lr": 0.0004340677550949995, "data_time": 0.0033464431762695312, "grad_norm": 0.5565977901220321, "loss": 0.4586246907711029, "time": 1.574084186553955, "epoch": 528, "memory": 35394, "step": 165251}
{"lr": 0.0004330337035312388, "data_time": 0.4475813150405884, "grad_norm": 0.5972010910511016, "loss": 0.45376384258270264, "time": 1.5803662538528442, "epoch": 529, "memory": 35394, "step": 165364}
{"lr": 0.00043211926072091585, "data_time": 0.24743008613586426, "grad_norm": 0.590008196234703, "loss": 0.4640286684036255, "time": 1.585184907913208, "epoch": 529, "memory": 35394, "step": 165464}
{"lr": 0.00043120542706705527, "data_time": 0.002242279052734375, "grad_norm": 0.5770581424236297, "loss": 0.46067601144313813, "time": 1.5528703212738038, "epoch": 529, "memory": 35394, "step": 165564}
{"lr": 0.0004301735300274364, "data_time": 0.42327237129211426, "grad_norm": 0.6308320790529252, "loss": 0.4585976928472519, "time": 1.6376174449920655, "epoch": 530, "memory": 35394, "step": 165677}
{"lr": 0.0004292609989196608, "data_time": 0.0541438102722168, "grad_norm": 0.6441459804773331, "loss": 0.46336357593536376, "time": 1.564130687713623, "epoch": 530, "memory": 35394, "step": 165777}
{"lr": 0.0004283490817012087, "data_time": 0.002451467514038086, "grad_norm": 0.5518144637346267, "loss": 0.4656200408935547, "time": 1.4829997301101685, "epoch": 530, "memory": 35394, "step": 165877}
{"lr": 0.0004273193559253793, "data_time": 0.3527352809906006, "grad_norm": 0.590070903301239, "loss": 0.46330364644527433, "time": 1.5422115802764893, "epoch": 531, "memory": 35394, "step": 165990}
{"lr": 0.00042640875132341936, "data_time": 0.0023626089096069336, "grad_norm": 0.5025842875242233, "loss": 0.45463272631168367, "time": 1.5416868925094604, "epoch": 531, "memory": 35394, "step": 166090}
{"lr": 0.00042549876533368576, "data_time": 0.0036053180694580076, "grad_norm": 0.5864743947982788, "loss": 0.45629443824291227, "time": 1.6584874153137208, "epoch": 531, "memory": 35394, "step": 166190}
{"lr": 0.0004244712275260707, "data_time": 0.6084090232849121, "grad_norm": 0.5042063474655152, "loss": 0.45952599942684175, "time": 1.5349165678024292, "epoch": 532, "memory": 35394, "step": 166303}
{"lr": 0.00042356256420194313, "data_time": 0.4789325952529907, "grad_norm": 0.6148008644580841, "loss": 0.4531367152929306, "time": 1.5044788122177124, "epoch": 532, "memory": 35394, "step": 166403}
{"lr": 0.0004226545242029103, "data_time": 0.054021024703979494, "grad_norm": 0.6594449758529664, "loss": 0.4546388566493988, "time": 1.5485456228256225, "epoch": 532, "memory": 35394, "step": 166503}
{"lr": 0.00042162919103244125, "data_time": 0.5755913496017456, "grad_norm": 0.5486333996057511, "loss": 0.4608653336763382, "time": 1.6168436765670777, "epoch": 533, "memory": 35394, "step": 166616}
{"lr": 0.00042072248372667184, "data_time": 0.28376624584197996, "grad_norm": 0.5975134283304214, "loss": 0.4672322332859039, "time": 1.5761467933654785, "epoch": 533, "memory": 35394, "step": 166716}
{"lr": 0.0004198164044487532, "data_time": 0.003445887565612793, "grad_norm": 0.6141931861639023, "loss": 0.46053530275821686, "time": 1.5720263242721557, "epoch": 533, "memory": 35394, "step": 166816}
{"lr": 0.0004187932925485971, "data_time": 0.3524838924407959, "grad_norm": 0.5869006723165512, "loss": 0.45842899680137633, "time": 1.530207896232605, "epoch": 534, "memory": 35394, "step": 166929}
{"lr": 0.00041788855596997936, "data_time": 0.3690948486328125, "grad_norm": 0.5898889690637589, "loss": 0.4530953884124756, "time": 1.6209908485412599, "epoch": 534, "memory": 35394, "step": 167029}
{"lr": 0.00041698445211178, "data_time": 0.23883867263793945, "grad_norm": 0.5852943807840347, "loss": 0.4643144369125366, "time": 1.563228178024292, "epoch": 534, "memory": 35394, "step": 167129}
{"lr": 0.0004159635780790713, "data_time": 0.11955413818359376, "grad_norm": 0.6554406642913818, "loss": 0.4572501599788666, "time": 1.3333288431167603, "epoch": 535, "memory": 35394, "step": 167242}
{"lr": 0.00041506082690442934, "data_time": 0.0026194095611572266, "grad_norm": 0.5705272912979126, "loss": 0.46362919807434083, "time": 1.5771879196166991, "epoch": 535, "memory": 35394, "step": 167342}
{"lr": 0.0004141587131325103, "data_time": 0.004994606971740723, "grad_norm": 0.6221666604280471, "loss": 0.4656320005655289, "time": 1.5569756031036377, "epoch": 535, "memory": 35394, "step": 167442}
{"lr": 0.00041314009352807867, "data_time": 0.33720457553863525, "grad_norm": 0.6067762166261673, "loss": 0.4606068730354309, "time": 1.509045171737671, "epoch": 536, "memory": 35394, "step": 167555}
{"lr": 0.0004122393424020308, "data_time": 0.3660029172897339, "grad_norm": 0.573862487077713, "loss": 0.4627677261829376, "time": 1.573280692100525, "epoch": 536, "memory": 35394, "step": 167655}
{"lr": 0.00041133923335066885, "data_time": 0.003491067886352539, "grad_norm": 0.5694041401147842, "loss": 0.46157959699630735, "time": 1.5721940279006958, "epoch": 536, "memory": 35394, "step": 167755}
{"lr": 0.0004103228846987733, "data_time": 0.6154945850372314, "grad_norm": 0.5603195339441299, "loss": 0.4582466036081314, "time": 1.5366836309432983, "epoch": 537, "memory": 35394, "step": 167868}
{"lr": 0.0004094241482334896, "data_time": 0.0029926061630249023, "grad_norm": 0.5302948594093323, "loss": 0.45709786415100095, "time": 1.525183343887329, "epoch": 537, "memory": 35394, "step": 167968}
{"lr": 0.00040852605850443994, "data_time": 0.004353237152099609, "grad_norm": 0.6175556182861328, "loss": 0.4558536857366562, "time": 1.530568265914917, "epoch": 537, "memory": 35394, "step": 168068}
{"lr": 0.0004075119972924997, "data_time": 0.36314566135406495, "grad_norm": 0.557904714345932, "loss": 0.45873580276966097, "time": 1.4477535247802735, "epoch": 538, "memory": 35394, "step": 168181}
{"lr": 0.00040661529006747046, "data_time": 0.20789580345153807, "grad_norm": 0.5914204806089401, "loss": 0.4672317147254944, "time": 1.5949617147445678, "epoch": 538, "memory": 35394, "step": 168281}
{"lr": 0.00040571923422973215, "data_time": 0.0035965681076049806, "grad_norm": 0.5813403069972992, "loss": 0.45786656737327575, "time": 1.6059229373931885, "epoch": 538, "memory": 35394, "step": 168381}
{"lr": 0.00040470747690805837, "data_time": 0.03035264015197754, "grad_norm": 0.5780940115451813, "loss": 0.45322238802909853, "time": 1.5403072595596314, "epoch": 539, "memory": 35394, "step": 168494}
{"lr": 0.0004038128134698545, "data_time": 0.002318286895751953, "grad_norm": 0.6752405047416687, "loss": 0.46040448248386384, "time": 1.490757203102112, "epoch": 539, "memory": 35394, "step": 168594}
{"lr": 0.00040291880605943046, "data_time": 0.0029758453369140626, "grad_norm": 0.5699977487325668, "loss": 0.45643014907836915, "time": 1.6922202587127686, "epoch": 539, "memory": 35394, "step": 168694}
{"lr": 0.00040190936904095957, "data_time": 0.0035473108291625977, "grad_norm": Infinity, "loss": 0.45771671235561373, "time": 1.541384792327881, "epoch": 540, "memory": 35394, "step": 168807}
{"lr": 0.00040101676390299655, "data_time": 0.0030855178833007813, "grad_norm": 0.581960478425026, "loss": 0.4627303659915924, "time": 1.475033974647522, "epoch": 540, "memory": 35394, "step": 168907}
{"lr": 0.0004001248194226609, "data_time": 0.0036232471466064453, "grad_norm": 0.6555616974830627, "loss": 0.464132097363472, "time": 1.4924184799194335, "epoch": 540, "memory": 35394, "step": 169007}
{"lr": 0.00039911771908269036, "data_time": 0.4109588623046875, "grad_norm": 0.5800839930772781, "loss": 0.4624835789203644, "time": 1.5016674280166626, "epoch": 541, "memory": 35394, "step": 169120}
{"lr": 0.0003982271867249939, "data_time": 0.00317530632019043, "grad_norm": 0.6294984221458435, "loss": 0.46711096465587615, "time": 1.5689259052276612, "epoch": 541, "memory": 35394, "step": 169220}
{"lr": 0.0003973373196440552, "data_time": 0.003482556343078613, "grad_norm": 0.6257248193025589, "loss": 0.45659096240997316, "time": 1.5339579820632934, "epoch": 541, "memory": 35394, "step": 169320}
{"lr": 0.0003963325723199757, "data_time": 0.2417588472366333, "grad_norm": 0.6343737989664078, "loss": 0.451917439699173, "time": 1.5139967441558837, "epoch": 542, "memory": 35394, "step": 169433}
{"lr": 0.00039544412718894666, "data_time": 0.0032866954803466796, "grad_norm": 0.5770245283842087, "loss": 0.4579639345407486, "time": 1.5383379697799682, "epoch": 542, "memory": 35394, "step": 169533}
{"lr": 0.00039455635194301337, "data_time": 0.002567148208618164, "grad_norm": 0.6232745528221131, "loss": 0.4609567612409592, "time": 1.5610042333602905, "epoch": 542, "memory": 35394, "step": 169633}
{"lr": 0.00039355397393404424, "data_time": 0.2538708448410034, "grad_norm": 0.5616798341274262, "loss": 0.45398237407207487, "time": 1.5494332790374756, "epoch": 543, "memory": 35394, "step": 169746}
{"lr": 0.0003926676304422246, "data_time": 0.0023700952529907226, "grad_norm": 0.6140070021152496, "loss": 0.45926075875759126, "time": 1.5494198560714723, "epoch": 543, "memory": 35394, "step": 169846}
{"lr": 0.0003917819614329707, "data_time": 0.003385758399963379, "grad_norm": 0.5698108851909638, "loss": 0.4526193827390671, "time": 1.465971875190735, "epoch": 543, "memory": 35394, "step": 169946}
{"lr": 0.0003907819689998959, "data_time": 0.4376526355743408, "grad_norm": 0.5933897137641907, "loss": 0.4578270107507706, "time": 1.6218493938446046, "epoch": 544, "memory": 35394, "step": 170059}
{"lr": 0.00038989774152573285, "data_time": 0.002128148078918457, "grad_norm": 0.6217186868190765, "loss": 0.4554139941930771, "time": 1.552455186843872, "epoch": 544, "memory": 35394, "step": 170159}
{"lr": 0.000389014193120665, "data_time": 0.003245711326599121, "grad_norm": 0.518463084101677, "loss": 0.46039530634880066, "time": 1.5122678518295287, "epoch": 544, "memory": 35394, "step": 170259}
{"lr": 0.0003880166024855705, "data_time": 0.4680896520614624, "grad_norm": 0.5659046739339828, "loss": 0.4591542065143585, "time": 1.5629156351089477, "epoch": 545, "memory": 35394, "step": 170372}
{"lr": 0.0003871345053731857, "data_time": 0.1801426649093628, "grad_norm": 0.6757174730300903, "loss": 0.46696434915065765, "time": 1.5338801622390748, "epoch": 545, "memory": 35394, "step": 170472}
{"lr": 0.0003862530919054092, "data_time": 0.003157186508178711, "grad_norm": 0.5785263061523438, "loss": 0.4524866759777069, "time": 1.5441863536834717, "epoch": 545, "memory": 35394, "step": 170572}
{"lr": 0.00038525791925141627, "data_time": 0.13716740608215333, "grad_norm": 0.5560128033161164, "loss": 0.458545857667923, "time": 1.2394107818603515, "epoch": 546, "memory": 35394, "step": 170685}
{"lr": 0.0003843779668103724, "data_time": 0.0022023200988769533, "grad_norm": 0.6240074425935745, "loss": 0.46201385259628297, "time": 1.5820096492767335, "epoch": 546, "memory": 35394, "step": 170785}
{"lr": 0.00038349870257835966, "data_time": 0.002321171760559082, "grad_norm": 0.5659925550222397, "loss": 0.4577271997928619, "time": 1.5157543420791626, "epoch": 546, "memory": 35394, "step": 170885}
{"lr": 0.00038250596404936567, "data_time": 0.51865553855896, "grad_norm": 0.6337901324033737, "loss": 0.4559400349855423, "time": 1.58132803440094, "epoch": 547, "memory": 35394, "step": 170998}
{"lr": 0.0003816281705544336, "data_time": 0.3556470155715942, "grad_norm": 0.6034791946411133, "loss": 0.45672344267368314, "time": 1.5477179527282714, "epoch": 547, "memory": 35394, "step": 171098}
{"lr": 0.00038075106982179193, "data_time": 0.1074216365814209, "grad_norm": 0.5783869206905365, "loss": 0.46254365146160126, "time": 1.5339880228042602, "epoch": 547, "memory": 35394, "step": 171198}
{"lr": 0.00037976078152220707, "data_time": 0.3744234085083008, "grad_norm": 0.5303193360567093, "loss": 0.4557424634695053, "time": 1.5458199024200439, "epoch": 548, "memory": 35394, "step": 171311}
{"lr": 0.0003788851612131357, "data_time": 0.004927587509155273, "grad_norm": 0.6214347183704376, "loss": 0.46307753920555117, "time": 1.5118167638778686, "epoch": 548, "memory": 35394, "step": 171411}
{"lr": 0.00037801023820837443, "data_time": 0.0051071405410766605, "grad_norm": 0.6628738760948181, "loss": 0.46050947308540346, "time": 1.4998202323913574, "epoch": 548, "memory": 35394, "step": 171511}
{"lr": 0.0003770224162028614, "data_time": 0.2691781044006348, "grad_norm": 0.5537830144166946, "loss": 0.45897920429706573, "time": 1.5358105897903442, "epoch": 549, "memory": 35394, "step": 171624}
{"lr": 0.0003761489832841442, "data_time": 0.002633833885192871, "grad_norm": 0.6316789656877517, "loss": 0.46033309400081635, "time": 1.4899790287017822, "epoch": 549, "memory": 35394, "step": 171724}
{"lr": 0.00037527625220044604, "data_time": 0.0028413772583007813, "grad_norm": 0.5795219749212265, "loss": 0.4633674830198288, "time": 1.5675413131713867, "epoch": 549, "memory": 35394, "step": 171824}
{"lr": 0.0003742909125136582, "data_time": 0.47769601345062257, "grad_norm": 0.5878650605678558, "loss": 0.46049203276634215, "time": 1.630865740776062, "epoch": 550, "memory": 35394, "step": 171937}
{"lr": 0.0003734196811543054, "data_time": 0.018722176551818848, "grad_norm": 0.6301087498664856, "loss": 0.45254174768924715, "time": 1.5359645366668702, "epoch": 550, "memory": 35394, "step": 172037}
{"lr": 0.00037254915614929566, "data_time": 0.002886176109313965, "grad_norm": 0.5746896684169769, "loss": 0.4581951588392258, "time": 1.5234261989593505, "epoch": 550, "memory": 35394, "step": 172137}
{"lr": 0.0003715663147656187, "data_time": 0.5229764699935913, "grad_norm": 0.6052733182907104, "loss": 0.4573037922382355, "time": 1.506078052520752, "epoch": 551, "memory": 35394, "step": 172250}
{"lr": 0.0003706972990989269, "data_time": 0.4503605604171753, "grad_norm": 0.6294073820114136, "loss": 0.45289161801338196, "time": 1.554313635826111, "epoch": 551, "memory": 35394, "step": 172350}
{"lr": 0.0003698289942944422, "data_time": 0.004158186912536621, "grad_norm": 0.5997302830219269, "loss": 0.4636212199926376, "time": 1.5177593946456909, "epoch": 551, "memory": 35394, "step": 172450}
{"lr": 0.0003688486671577324, "data_time": 0.4214385271072388, "grad_norm": 0.5356152355670929, "loss": 0.45801413655281065, "time": 1.5675878047943115, "epoch": 552, "memory": 35394, "step": 172563}
{"lr": 0.0003679818812810534, "data_time": 0.07993488311767578, "grad_norm": 0.6252979785203934, "loss": 0.46164655685424805, "time": 1.4930055141448975, "epoch": 552, "memory": 35394, "step": 172663}
{"lr": 0.00036711581076291476, "data_time": 0.002492809295654297, "grad_norm": 0.559296452999115, "loss": 0.46207743883132935, "time": 1.522247838973999, "epoch": 552, "memory": 35394, "step": 172763}
{"lr": 0.00036613801377624215, "data_time": 0.5841870307922363, "grad_norm": Infinity, "loss": 0.4586715310811996, "time": 1.5269373893737792, "epoch": 553, "memory": 35394, "step": 172876}
{"lr": 0.00036527347175075635, "data_time": 0.14923558235168458, "grad_norm": 0.5668274194002152, "loss": 0.4618526190519333, "time": 1.5390640020370483, "epoch": 553, "memory": 35394, "step": 172976}
{"lr": 0.00036440964956853915, "data_time": 0.0027582645416259766, "grad_norm": 0.623907470703125, "loss": 0.4519419133663177, "time": 1.5115414381027221, "epoch": 553, "memory": 35394, "step": 173076}
{"lr": 0.00036343439859392977, "data_time": 0.4649455785751343, "grad_norm": 0.6241862207651139, "loss": 0.4626448929309845, "time": 1.5606517553329469, "epoch": 554, "memory": 35394, "step": 173189}
{"lr": 0.00036257211444441783, "data_time": 0.24613404273986816, "grad_norm": 0.5824163019657135, "loss": 0.46230279207229613, "time": 1.5559445142745971, "epoch": 554, "memory": 35394, "step": 173289}
{"lr": 0.0003617105546112254, "data_time": 0.003066682815551758, "grad_norm": 0.6166815042495728, "loss": 0.4577228307723999, "time": 1.473702573776245, "epoch": 554, "memory": 35394, "step": 173389}
{"lr": 0.00036073786546940255, "data_time": 0.4909611940383911, "grad_norm": 0.5799437135457992, "loss": 0.45835564136505125, "time": 1.5471317291259765, "epoch": 555, "memory": 35394, "step": 173502}
{"lr": 0.0003598778531840167, "data_time": 0.003103494644165039, "grad_norm": 0.6395867168903351, "loss": 0.4627070128917694, "time": 1.5842172145843505, "epoch": 555, "memory": 35394, "step": 173602}
{"lr": 0.00035901856967625084, "data_time": 0.0026495695114135743, "grad_norm": 0.6090344905853271, "loss": 0.4620056450366974, "time": 1.5710027933120727, "epoch": 555, "memory": 35394, "step": 173702}
{"lr": 0.0003580484581463803, "data_time": 0.5738638401031494, "grad_norm": 0.5517751753330231, "loss": 0.45590385496616365, "time": 1.5132541418075562, "epoch": 556, "memory": 35394, "step": 173815}
{"lr": 0.00035719073167641836, "data_time": 0.22222416400909423, "grad_norm": 0.6075999677181244, "loss": 0.46247269213199615, "time": 1.5339619398117066, "epoch": 556, "memory": 35394, "step": 173915}
{"lr": 0.00035633373843355515, "data_time": 0.003000211715698242, "grad_norm": 0.6665691912174225, "loss": 0.4623124420642853, "time": 1.5566064596176148, "epoch": 556, "memory": 35394, "step": 174015}
{"lr": 0.00035536622025298634, "data_time": 0.586616325378418, "grad_norm": 0.6213989108800888, "loss": 0.4597512662410736, "time": 1.704495120048523, "epoch": 557, "memory": 35394, "step": 174128}
{"lr": 0.00035451079351266486, "data_time": 0.17110857963562012, "grad_norm": 0.5609448313713074, "loss": 0.4642800331115723, "time": 1.5110622882843017, "epoch": 557, "memory": 35394, "step": 174228}
{"lr": 0.00035365610443702624, "data_time": 0.0024786472320556642, "grad_norm": 0.627639651298523, "loss": 0.460750487446785, "time": 1.5841416597366333, "epoch": 557, "memory": 35394, "step": 174328}
{"lr": 0.00035269119530104067, "data_time": 0.3717689037322998, "grad_norm": 0.5242098689079284, "loss": 0.461269810795784, "time": 1.518712043762207, "epoch": 558, "memory": 35394, "step": 174441}
{"lr": 0.00035183808216726967, "data_time": 0.002180647850036621, "grad_norm": 0.5766230195760726, "loss": 0.4599194020032883, "time": 1.5841290473937988, "epoch": 558, "memory": 35394, "step": 174541}
{"lr": 0.0003509857111237991, "data_time": 0.002978348731994629, "grad_norm": 0.606754145026207, "loss": 0.4590109199285507, "time": 1.4994348287582397, "epoch": 558, "memory": 35394, "step": 174641}
{"lr": 0.0003500234266853531, "data_time": 0.4317203760147095, "grad_norm": 0.646055594086647, "loss": 0.4564560353755951, "time": 1.4580473899841309, "epoch": 559, "memory": 35394, "step": 174754}
{"lr": 0.0003491726409975109, "data_time": 0.13464584350585937, "grad_norm": 0.6145693212747574, "loss": 0.46259556114673617, "time": 1.5403179407119751, "epoch": 559, "memory": 35394, "step": 174854}
{"lr": 0.00034832260181354824, "data_time": 0.0026366472244262694, "grad_norm": 0.557951208949089, "loss": 0.45797464847564695, "time": 1.5287021160125733, "epoch": 559, "memory": 35394, "step": 174954}
{"lr": 0.0003473629576830195, "data_time": 0.3728464365005493, "grad_norm": 0.6927371025085449, "loss": 0.4571641743183136, "time": 1.5024106740951537, "epoch": 560, "memory": 35394, "step": 175067}
{"lr": 0.00034651451324272807, "data_time": 0.19312551021575927, "grad_norm": 0.5995288044214249, "loss": 0.45765475630760194, "time": 1.5193102836608887, "epoch": 560, "memory": 35394, "step": 175167}
{"lr": 0.0003456668197077855, "data_time": 0.0026409387588500976, "grad_norm": 0.6873319387435913, "loss": 0.4597239136695862, "time": 1.5273513078689576, "epoch": 560, "memory": 35394, "step": 175267}
{"lr": 0.0003447098314527201, "data_time": 0.4589502811431885, "grad_norm": 0.5713198333978653, "loss": 0.46125791370868685, "time": 1.5050816774368285, "epoch": 561, "memory": 35394, "step": 175380}
{"lr": 0.00034386374202362145, "data_time": 0.08909890651702881, "grad_norm": 0.6207251787185669, "loss": 0.46434887647628786, "time": 1.341056799888611, "epoch": 561, "memory": 35394, "step": 175480}
{"lr": 0.0003430184078891595, "data_time": 0.0027414560317993164, "grad_norm": 0.5199397206306458, "loss": 0.4518302589654922, "time": 1.5616890430450439, "epoch": 561, "memory": 35394, "step": 175580}
{"lr": 0.00034206409103401946, "data_time": 0.35805277824401854, "grad_norm": 0.6394208461046219, "loss": 0.4630968481302261, "time": 1.5290456533432006, "epoch": 562, "memory": 35394, "step": 175693}
{"lr": 0.00034122037034155174, "data_time": 0.10601725578308105, "grad_norm": 0.5421143054962159, "loss": 0.4625730574131012, "time": 1.5105294227600097, "epoch": 562, "memory": 35394, "step": 175793}
{"lr": 0.00034037740932075524, "data_time": 0.00229034423828125, "grad_norm": 0.5762126266956329, "loss": 0.45573616325855254, "time": 1.5489685773849486, "epoch": 562, "memory": 35394, "step": 175893}
{"lr": 0.00033942577934666676, "data_time": 0.3866418838500977, "grad_norm": 0.6451096653938293, "loss": 0.45781423449516295, "time": 1.5568061590194702, "epoch": 563, "memory": 35394, "step": 176006}
{"lr": 0.00033858444107784277, "data_time": 0.06392428874969483, "grad_norm": 0.5696883827447892, "loss": 0.4608901411294937, "time": 1.403722310066223, "epoch": 563, "memory": 35394, "step": 176106}
{"lr": 0.0003377438668454012, "data_time": 0.002506589889526367, "grad_norm": 0.6256498605012893, "loss": 0.4587269753217697, "time": 1.570726752281189, "epoch": 563, "memory": 35394, "step": 176206}
{"lr": 0.00033679493918990365, "data_time": 0.5256342649459839, "grad_norm": 0.5893540859222413, "loss": 0.4561788529157639, "time": 1.539394211769104, "epoch": 564, "memory": 35394, "step": 176319}
{"lr": 0.0003359559969930868, "data_time": 0.030516719818115233, "grad_norm": 0.6163710713386535, "loss": 0.45397086441516876, "time": 1.4928903818130492, "epoch": 564, "memory": 35394, "step": 176419}
{"lr": 0.00033511782318496964, "data_time": 0.0027689218521118166, "grad_norm": 0.6132803976535797, "loss": 0.45886786580085753, "time": 1.5452134609222412, "epoch": 564, "memory": 35394, "step": 176519}
{"lr": 0.00033417161324176414, "data_time": 0.46644816398620603, "grad_norm": 0.5813918560743332, "loss": 0.4625336706638336, "time": 1.5378334522247314, "epoch": 565, "memory": 35394, "step": 176632}
{"lr": 0.00033333508072644894, "data_time": 0.2620327711105347, "grad_norm": 0.6141989886760711, "loss": 0.4595394551753998, "time": 1.5281564474105835, "epoch": 565, "memory": 35394, "step": 176732}
{"lr": 0.0003324993209396864, "data_time": 0.002371525764465332, "grad_norm": 0.612521693110466, "loss": 0.4629116922616959, "time": 1.5121979475021363, "epoch": 565, "memory": 35394, "step": 176832}
{"lr": 0.0003315558440583866, "data_time": 0.5225757598876953, "grad_norm": 0.5553156495094299, "loss": 0.45752870440483095, "time": 1.567020344734192, "epoch": 566, "memory": 35394, "step": 176945}
{"lr": 0.0003307217347949766, "data_time": 0.10402169227600097, "grad_norm": 0.5398392140865326, "loss": 0.46512375473976136, "time": 1.585206890106201, "epoch": 566, "memory": 35394, "step": 177045}
{"lr": 0.0003298884025874365, "data_time": 0.002797245979309082, "grad_norm": 0.6014207899570465, "loss": 0.4617195248603821, "time": 1.5410498142242433, "epoch": 566, "memory": 35394, "step": 177145}
{"lr": 0.0003289476740733205, "data_time": 0.39168481826782225, "grad_norm": 0.6113144874572753, "loss": 0.45711030066013336, "time": 1.5183342933654784, "epoch": 567, "memory": 35394, "step": 177258}
{"lr": 0.0003281160015929095, "data_time": 0.0019815444946289064, "grad_norm": 0.5562660455703735, "loss": 0.45939464271068575, "time": 1.5329211950302124, "epoch": 567, "memory": 35394, "step": 177358}
{"lr": 0.0003272851104830803, "data_time": 0.00270233154296875, "grad_norm": 0.5936060041189194, "loss": 0.4648050993680954, "time": 1.5211599588394165, "epoch": 567, "memory": 35394, "step": 177458}
{"lr": 0.0003263471455968412, "data_time": 0.5358163833618164, "grad_norm": 0.5918124496936799, "loss": 0.4643809348344803, "time": 1.54934983253479, "epoch": 568, "memory": 35394, "step": 177571}
{"lr": 0.0003255179233909935, "data_time": 0.21209847927093506, "grad_norm": 0.5784164518117905, "loss": 0.45785254836082456, "time": 1.5470351457595826, "epoch": 568, "memory": 35394, "step": 177671}
{"lr": 0.0003246894868577623, "data_time": 0.002183866500854492, "grad_norm": 0.6435190856456756, "loss": 0.4608622282743454, "time": 1.5224467754364013, "epoch": 568, "memory": 35394, "step": 177771}
{"lr": 0.0003237543008152611, "data_time": 0.3680696725845337, "grad_norm": 0.644582548737526, "loss": 0.4586130380630493, "time": 1.5401539564132691, "epoch": 569, "memory": 35394, "step": 177884}
{"lr": 0.00032292754233579116, "data_time": 0.01531069278717041, "grad_norm": 0.5954945355653762, "loss": 0.4572967678308487, "time": 1.5505828857421875, "epoch": 569, "memory": 35394, "step": 177984}
{"lr": 0.0003221015738182276, "data_time": 0.003292346000671387, "grad_norm": 0.5774082660675048, "loss": 0.45653444826602935, "time": 1.552845048904419, "epoch": 569, "memory": 35394, "step": 178084}
{"lr": 0.0003211691817902465, "data_time": 0.35995280742645264, "grad_norm": 0.6344745397567749, "loss": 0.4580988198518753, "time": 1.391470718383789, "epoch": 570, "memory": 35394, "step": 178197}
{"lr": 0.0003203449004490023, "data_time": 0.17038364410400392, "grad_norm": 0.6199406057596206, "loss": 0.45953287184238434, "time": 1.5254599809646607, "epoch": 570, "memory": 35394, "step": 178297}
{"lr": 0.00031952141334613983, "data_time": 0.07854974269866943, "grad_norm": 0.6464730858802795, "loss": 0.4648177891969681, "time": 1.516037917137146, "epoch": 570, "memory": 35394, "step": 178397}
{"lr": 0.00031859183045813576, "data_time": 0.3805238246917725, "grad_norm": 0.5784495115280152, "loss": 0.4672441244125366, "time": 1.5516149759292603, "epoch": 571, "memory": 35394, "step": 178510}
{"lr": 0.00031777003962677906, "data_time": 0.0021892070770263674, "grad_norm": 0.6633891880512237, "loss": 0.4596389830112457, "time": 1.4942271947860717, "epoch": 571, "memory": 35394, "step": 178610}
{"lr": 0.0003169490472973966, "data_time": 0.002522730827331543, "grad_norm": 0.7130104959011078, "loss": 0.45548138320446013, "time": 1.5553608655929565, "epoch": 571, "memory": 35394, "step": 178710}
{"lr": 0.00031602228862925575, "data_time": 0.26894686222076414, "grad_norm": 0.6314432680606842, "loss": 0.45432065725326537, "time": 1.5289069890975953, "epoch": 572, "memory": 35394, "step": 178823}
{"lr": 0.000315203001639049, "data_time": 0.0021721839904785155, "grad_norm": 0.6379350244998931, "loss": 0.46224523782730104, "time": 1.358170223236084, "epoch": 572, "memory": 35394, "step": 178923}
{"lr": 0.00031438451740145335, "data_time": 0.0021869182586669923, "grad_norm": 0.5770564049482345, "loss": 0.4540161579847336, "time": 1.5051328659057617, "epoch": 572, "memory": 35394, "step": 179023}
{"lr": 0.00031346059798724755, "data_time": 0.5467628955841064, "grad_norm": 0.6098365962505341, "loss": 0.46088475584983823, "time": 1.5065413475036622, "epoch": 573, "memory": 35394, "step": 179136}
{"lr": 0.0003126438281288332, "data_time": 0.360413122177124, "grad_norm": 0.5839711546897888, "loss": 0.45469839572906495, "time": 1.5413556814193725, "epoch": 573, "memory": 35394, "step": 179236}
{"lr": 0.0003118278652606463, "data_time": 0.002823185920715332, "grad_norm": 0.6733098566532135, "loss": 0.4590130239725113, "time": 1.50746693611145, "epoch": 573, "memory": 35394, "step": 179336}
{"lr": 0.00031090680008838797, "data_time": 0.2931325674057007, "grad_norm": 0.5648280590772629, "loss": 0.4556719183921814, "time": 1.5040266513824463, "epoch": 574, "memory": 35394, "step": 179449}
{"lr": 0.00031009256061157706, "data_time": 0.0019988298416137697, "grad_norm": 0.5679540544748306, "loss": 0.4603580325841904, "time": 1.418637490272522, "epoch": 574, "memory": 35394, "step": 179549}
{"lr": 0.0003092791323495175, "data_time": 0.002448630332946777, "grad_norm": 0.5790441542863846, "loss": 0.4570068895816803, "time": 1.5296305418014526, "epoch": 574, "memory": 35394, "step": 179649}
{"lr": 0.00030836093636091695, "data_time": 0.5556326389312745, "grad_norm": 0.6055224120616913, "loss": 0.46162081956863404, "time": 1.5197458267211914, "epoch": 575, "memory": 35394, "step": 179762}
{"lr": 0.0003075492404744712, "data_time": 0.19373812675476074, "grad_norm": 0.632685872912407, "loss": 0.458412367105484, "time": 1.502940583229065, "epoch": 575, "memory": 35394, "step": 179862}
{"lr": 0.00030673836001414125, "data_time": 0.02974114418029785, "grad_norm": 0.6371413469314575, "loss": 0.4589307218790054, "time": 1.520861840248108, "epoch": 575, "memory": 35394, "step": 179962}
{"lr": 0.0003058230481043646, "data_time": 0.3148239850997925, "grad_norm": 0.6542795747518539, "loss": 0.45538751482963563, "time": 1.5227519273757935, "epoch": 576, "memory": 35394, "step": 180075}
{"lr": 0.0003050139089757829, "data_time": 0.002160382270812988, "grad_norm": 0.6137961208820343, "loss": 0.4600616157054901, "time": 1.5215247392654419, "epoch": 576, "memory": 35394, "step": 180175}
{"lr": 0.00030420558947145336, "data_time": 0.0028418302536010742, "grad_norm": 0.5501758962869644, "loss": 0.45870664417743684, "time": 1.4945824861526489, "epoch": 576, "memory": 35394, "step": 180275}
{"lr": 0.00030329317648888015, "data_time": 0.3798897981643677, "grad_norm": 0.6029308289289474, "loss": 0.46218069195747374, "time": 1.523914909362793, "epoch": 577, "memory": 35394, "step": 180388}
{"lr": 0.00030248660724418596, "data_time": 0.08705554008483887, "grad_norm": 0.5890414774417877, "loss": 0.4521443575620651, "time": 1.506126046180725, "epoch": 577, "memory": 35394, "step": 180488}
{"lr": 0.00030168086180858383, "data_time": 0.002718615531921387, "grad_norm": 0.6519883334636688, "loss": 0.4555839240550995, "time": 1.5049314737319945, "epoch": 577, "memory": 35394, "step": 180588}
{"lr": 0.00030077136255456663, "data_time": 0.49267704486846925, "grad_norm": 0.5514782994985581, "loss": 0.4582207381725311, "time": 1.5087838172912598, "epoch": 578, "memory": 35394, "step": 180701}
{"lr": 0.0002999673762780926, "data_time": 0.13598158359527587, "grad_norm": 0.6437435269355773, "loss": 0.45571390688419344, "time": 1.4881382942199708, "epoch": 578, "memory": 35394, "step": 180801}
{"lr": 0.00029916421798218773, "data_time": 0.0022679567337036133, "grad_norm": 0.6209228903055191, "loss": 0.4585267066955566, "time": 1.5071892261505127, "epoch": 578, "memory": 35394, "step": 180901}
{"lr": 0.00029825764721081214, "data_time": 0.26622800827026366, "grad_norm": 0.6459212362766266, "loss": 0.4539849817752838, "time": 1.5095788478851317, "epoch": 579, "memory": 35394, "step": 181014}
{"lr": 0.0002974562569449912, "data_time": 0.0023839473724365234, "grad_norm": 0.6759532690048218, "loss": 0.4615833908319473, "time": 1.4936110496520996, "epoch": 579, "memory": 35394, "step": 181114}
{"lr": 0.00029665569881778434, "data_time": 0.0025753021240234376, "grad_norm": 0.6201076894998551, "loss": 0.45732966959476473, "time": 1.4784045696258545, "epoch": 579, "memory": 35394, "step": 181214}
{"lr": 0.00029575207123563017, "data_time": 0.32984654903411864, "grad_norm": 0.6395511329174042, "loss": 0.45871190130710604, "time": 1.5132098197937012, "epoch": 580, "memory": 35394, "step": 181327}
{"lr": 0.00029495328998078074, "data_time": 0.0020886898040771485, "grad_norm": 0.6464926600456238, "loss": 0.4537978321313858, "time": 1.459473156929016, "epoch": 580, "memory": 35394, "step": 181427}
{"lr": 0.00029415534500909155, "data_time": 0.0025607585906982423, "grad_norm": 0.5769134014844894, "loss": 0.45611385107040403, "time": 1.568917441368103, "epoch": 580, "memory": 35394, "step": 181527}
{"lr": 0.00029325467527499234, "data_time": 0.10886940956115723, "grad_norm": 0.5903304666280746, "loss": 0.46247372925281527, "time": 1.2634127140045166, "epoch": 581, "memory": 35394, "step": 181640}
{"lr": 0.00029245851598911027, "data_time": 0.0125091552734375, "grad_norm": 0.6939304649829865, "loss": 0.4612402617931366, "time": 1.5107616901397705, "epoch": 581, "memory": 35394, "step": 181740}
{"lr": 0.0002916631971173677, "data_time": 0.002915477752685547, "grad_norm": 0.5823019802570343, "loss": 0.4593788325786591, "time": 1.5147096157073974, "epoch": 581, "memory": 35394, "step": 181840}
{"lr": 0.00029076549984217515, "data_time": 0.5104311227798461, "grad_norm": 0.6358977615833282, "loss": 0.45447962880134585, "time": 1.5091692209243774, "epoch": 582, "memory": 35394, "step": 181953}
{"lr": 0.00028997197544072215, "data_time": 0.12187659740447998, "grad_norm": 0.6077434301376343, "loss": 0.4565938085317612, "time": 1.508384108543396, "epoch": 582, "memory": 35394, "step": 182053}
{"lr": 0.00028917929557075383, "data_time": 0.0024844884872436525, "grad_norm": 0.6268588244915009, "loss": 0.4567635267972946, "time": 1.5184007644653321, "epoch": 582, "memory": 35394, "step": 182153}
{"lr": 0.00028828458531709913, "data_time": 0.5650445461273194, "grad_norm": 0.5950996249914169, "loss": 0.4635088860988617, "time": 1.4988638162612915, "epoch": 583, "memory": 35394, "step": 182266}
{"lr": 0.00028749370867279237, "data_time": 0.22268979549407958, "grad_norm": 0.5677439153194428, "loss": 0.46020137667655947, "time": 1.3229549407958985, "epoch": 583, "memory": 35394, "step": 182366}
{"lr": 0.00028670368066361555, "data_time": 0.16640756130218506, "grad_norm": 0.5794206947088242, "loss": 0.4592739224433899, "time": 1.5294212102890015, "epoch": 583, "memory": 35394, "step": 182466}
{"lr": 0.0002858119719456742, "data_time": 0.4050585269927979, "grad_norm": 0.5678123652935028, "loss": 0.46100724041461943, "time": 1.52453556060791, "epoch": 584, "memory": 35394, "step": 182579}
{"lr": 0.0002850237558882784, "data_time": 0.16457951068878174, "grad_norm": 0.5914550215005875, "loss": 0.46070320904254913, "time": 1.5083398342132568, "epoch": 584, "memory": 35394, "step": 182679}
{"lr": 0.00028423639255589176, "data_time": 0.002788066864013672, "grad_norm": 0.563902947306633, "loss": 0.452350115776062, "time": 1.4969116926193238, "epoch": 584, "memory": 35394, "step": 182779}
{"lr": 0.000283347699839148, "data_time": 0.5448665380477905, "grad_norm": 0.6510003983974457, "loss": 0.4571573406457901, "time": 1.5180675268173218, "epoch": 585, "memory": 35394, "step": 182892}
{"lr": 0.0002825621571552678, "data_time": 0.19748916625976562, "grad_norm": 0.6162049233913421, "loss": 0.4588499903678894, "time": 1.4251579999923707, "epoch": 585, "memory": 35394, "step": 182992}
{"lr": 0.0002817774712724433, "data_time": 0.0028533220291137697, "grad_norm": 0.6870913207530975, "loss": 0.46475171744823457, "time": 1.4997116804122925, "epoch": 585, "memory": 35394, "step": 183092}
{"lr": 0.0002808918089734538, "data_time": 0.46111159324645995, "grad_norm": 0.5985116988420487, "loss": 0.45147305727005005, "time": 1.4992192029953002, "epoch": 586, "memory": 35394, "step": 183205}
{"lr": 0.0002801089524063258, "data_time": 0.2555497169494629, "grad_norm": 0.6398592442274094, "loss": 0.45847939848899844, "time": 1.5678959846496583, "epoch": 586, "memory": 35394, "step": 183305}
{"lr": 0.0002793269567023996, "data_time": 0.002642965316772461, "grad_norm": 0.6646841377019882, "loss": 0.4571583837270737, "time": 1.5287152051925659, "epoch": 586, "memory": 35394, "step": 183405}
{"lr": 0.00027844433918856285, "data_time": 0.5547163486480713, "grad_norm": 0.668271279335022, "loss": 0.45904468595981596, "time": 1.5951738119125367, "epoch": 587, "memory": 35394, "step": 183518}
{"lr": 0.0002776641814378483, "data_time": 0.07875978946685791, "grad_norm": 0.6555132627487182, "loss": 0.4622391819953918, "time": 1.5219807624816895, "epoch": 587, "memory": 35394, "step": 183618}
{"lr": 0.0002768848885985173, "data_time": 0.0025931358337402343, "grad_norm": 0.6409941732883453, "loss": 0.46047383546829224, "time": 1.2741183280944823, "epoch": 587, "memory": 35394, "step": 183718}
{"lr": 0.00027600533018783775, "data_time": 0.3302066087722778, "grad_norm": 0.5538578480482101, "loss": 0.45888687670230865, "time": 1.5367907524108886, "epoch": 588, "memory": 35394, "step": 183831}
{"lr": 0.00027522788390941773, "data_time": 0.001933741569519043, "grad_norm": 0.6012358337640762, "loss": 0.44896858036518095, "time": 1.523933506011963, "epoch": 588, "memory": 35394, "step": 183931}
{"lr": 0.0002744513065765323, "data_time": 0.002678060531616211, "grad_norm": 0.6711902916431427, "loss": 0.45521164536476133, "time": 1.5173811674118043, "epoch": 588, "memory": 35394, "step": 184031}
{"lr": 0.0002735748215373891, "data_time": 0.44211080074310305, "grad_norm": 0.6466831982135772, "loss": 0.45507856011390685, "time": 1.5031129121780396, "epoch": 589, "memory": 35394, "step": 184144}
{"lr": 0.00027280009934315823, "data_time": 0.16890947818756102, "grad_norm": 0.607656842470169, "loss": 0.46284609138965604, "time": 1.513748860359192, "epoch": 589, "memory": 35394, "step": 184244}
{"lr": 0.0002720262501145171, "data_time": 0.002879214286804199, "grad_norm": 0.6233344972133636, "loss": 0.45718251168727875, "time": 1.6141315698623657, "epoch": 589, "memory": 35394, "step": 184344}
{"lr": 0.0002711528526654322, "data_time": 0.3349339723587036, "grad_norm": 0.6424358457326889, "loss": 0.4624518543481827, "time": 1.5456143379211427, "epoch": 590, "memory": 35394, "step": 184457}
{"lr": 0.0002703808671230957, "data_time": 0.04722228050231934, "grad_norm": 0.6099741280078887, "loss": 0.4555675745010376, "time": 1.5622368097305297, "epoch": 590, "memory": 35394, "step": 184557}
{"lr": 0.00026960975855224085, "data_time": 0.0025655746459960936, "grad_norm": 0.6598614484071732, "loss": 0.4636788100004196, "time": 1.495130705833435, "epoch": 590, "memory": 35394, "step": 184657}
{"lr": 0.00026873946286164896, "data_time": 0.5712636232376098, "grad_norm": 0.628821462392807, "loss": 0.4577605456113815, "time": 1.5072445392608642, "epoch": 591, "memory": 35394, "step": 184770}
{"lr": 0.0002679702264945159, "data_time": 0.2970930576324463, "grad_norm": 0.5926956623792649, "loss": 0.4637341290712357, "time": 1.4691051483154296, "epoch": 591, "memory": 35394, "step": 184870}
{"lr": 0.0002672018710905311, "data_time": 0.0026779413223266602, "grad_norm": 0.6320198714733124, "loss": 0.45445617735385896, "time": 1.4282550811767578, "epoch": 591, "memory": 35394, "step": 184970}
{"lr": 0.00026633469127654926, "data_time": 0.09186642169952393, "grad_norm": 0.6113971680402756, "loss": 0.4543684244155884, "time": 1.3083587646484376, "epoch": 592, "memory": 35394, "step": 185083}
{"lr": 0.0002655682165633314, "data_time": 0.0019838571548461913, "grad_norm": 0.6281583189964295, "loss": 0.46218626797199247, "time": 1.498055100440979, "epoch": 592, "memory": 35394, "step": 185183}
{"lr": 0.0002648026267906378, "data_time": 0.002102494239807129, "grad_norm": 0.5861251413822174, "loss": 0.4613835453987122, "time": 1.5373999357223511, "epoch": 592, "memory": 35394, "step": 185283}
{"lr": 0.0002639385769208373, "data_time": 0.31921279430389404, "grad_norm": 0.616105392575264, "loss": 0.4604838967323303, "time": 1.5031420707702636, "epoch": 593, "memory": 35394, "step": 185396}
{"lr": 0.00026317487629544693, "data_time": 0.002105236053466797, "grad_norm": 0.5997410088777542, "loss": 0.4578433454036713, "time": 1.5288749933242798, "epoch": 593, "memory": 35394, "step": 185496}
{"lr": 0.000262412064573601, "data_time": 0.00240473747253418, "grad_norm": 0.6241365432739258, "loss": 0.44994083046913147, "time": 1.4944089651107788, "epoch": 593, "memory": 35394, "step": 185596}
{"lr": 0.0002615511586647779, "data_time": 0.39830167293548585, "grad_norm": 0.6143219769001007, "loss": 0.4604823410511017, "time": 1.4540311336517333, "epoch": 594, "memory": 35394, "step": 185709}
{"lr": 0.00026079024451612447, "data_time": 0.053545784950256345, "grad_norm": 0.5876426815986633, "loss": 0.46495976448059084, "time": 1.4350520849227906, "epoch": 594, "memory": 35394, "step": 185809}
{"lr": 0.0002600302232196156, "data_time": 0.0033251762390136717, "grad_norm": 0.6228360772132874, "loss": 0.4471079856157303, "time": 1.5296710014343262, "epoch": 594, "memory": 35394, "step": 185909}
{"lr": 0.0002591724752375645, "data_time": 0.47181684970855714, "grad_norm": 0.6322738766670227, "loss": 0.46287142038345336, "time": 1.555874800682068, "epoch": 595, "memory": 35394, "step": 186022}
{"lr": 0.00025841435990935557, "data_time": 0.1648786783218384, "grad_norm": 0.6039197444915771, "loss": 0.45917162001132966, "time": 1.483588981628418, "epoch": 595, "memory": 35394, "step": 186122}
{"lr": 0.00025765714136740726, "data_time": 0.0026238679885864256, "grad_norm": 0.5899613618850708, "loss": 0.4590456455945969, "time": 1.506100630760193, "epoch": 595, "memory": 35394, "step": 186222}
{"lr": 0.0002568025652266935, "data_time": 0.2655627965927124, "grad_norm": 0.5654799789190292, "loss": 0.4523042529821396, "time": 1.528308916091919, "epoch": 596, "memory": 35394, "step": 186335}
{"lr": 0.00025604726101723373, "data_time": 0.0019296884536743164, "grad_norm": 0.619758203625679, "loss": 0.4616017580032349, "time": 1.5087173223495483, "epoch": 596, "memory": 35394, "step": 186435}
{"lr": 0.0002552928575136017, "data_time": 0.00258028507232666, "grad_norm": 0.638546884059906, "loss": 0.4527915745973587, "time": 1.5216413021087647, "epoch": 596, "memory": 35394, "step": 186535}
{"lr": 0.00025444146707733726, "data_time": 0.27363085746765137, "grad_norm": 0.6278662323951721, "loss": 0.45713680386543276, "time": 1.5543524265289306, "epoch": 597, "memory": 35394, "step": 186648}
{"lr": 0.00025368898623932747, "data_time": 0.002114605903625488, "grad_norm": 0.6013423055410385, "loss": 0.4557835698127747, "time": 1.4747138500213623, "epoch": 597, "memory": 35394, "step": 186748}
{"lr": 0.0002529374100121024, "data_time": 0.0026690483093261717, "grad_norm": 0.5731219559907913, "loss": 0.45821561813354494, "time": 1.533633041381836, "epoch": 597, "memory": 35394, "step": 186848}
{"lr": 0.00025208921909171953, "data_time": 0.4478520154953003, "grad_norm": 0.6568421185016632, "loss": 0.4580923318862915, "time": 1.528026270866394, "epoch": 598, "memory": 35394, "step": 186961}
{"lr": 0.0002513395738320606, "data_time": 0.26175286769866946, "grad_norm": 0.650649556517601, "loss": 0.45899978280067444, "time": 1.4685955286026, "epoch": 598, "memory": 35394, "step": 187061}
{"lr": 0.0002505908370734681, "data_time": 0.0026363372802734376, "grad_norm": 0.6296666085720062, "loss": 0.4573896795511246, "time": 1.5162922859191894, "epoch": 598, "memory": 35394, "step": 187161}
{"lr": 0.0002497458594284956, "data_time": 0.2586252450942993, "grad_norm": 0.6757417410612107, "loss": 0.4540745109319687, "time": 1.479969048500061, "epoch": 599, "memory": 35394, "step": 187274}
{"lr": 0.000248999061908088, "data_time": 0.0020610570907592775, "grad_norm": 0.697472894191742, "loss": 0.4542015314102173, "time": 1.485852837562561, "epoch": 599, "memory": 35394, "step": 187374}
{"lr": 0.00024825317676429145, "data_time": 0.0038377523422241213, "grad_norm": 0.6698482573032379, "loss": 0.4552605152130127, "time": 1.515709686279297, "epoch": 599, "memory": 35394, "step": 187474}
{"lr": 0.0002474114261021314, "data_time": 0.3928283929824829, "grad_norm": 0.6305655300617218, "loss": 0.46108474135398864, "time": 1.4972015142440795, "epoch": 600, "memory": 35394, "step": 187587}
{"lr": 0.00024666748843568, "data_time": 0.06877176761627198, "grad_norm": 0.6477284818887711, "loss": 0.4578191637992859, "time": 1.4633874654769898, "epoch": 600, "memory": 35394, "step": 187687}
{"lr": 0.0002459244670065821, "data_time": 0.0027596235275268556, "grad_norm": 0.5723071396350861, "loss": 0.45687063634395597, "time": 1.4955358982086182, "epoch": 600, "memory": 35394, "step": 187787}
{"lr": 0.0002450859569822897, "data_time": 0.48739235401153563, "grad_norm": 0.7087085545063019, "loss": 0.44877988696098325, "time": 1.5360831022262573, "epoch": 601, "memory": 35394, "step": 187900}
{"lr": 0.0002443448912381057, "data_time": 0.17741870880126953, "grad_norm": 0.6929840743541718, "loss": 0.4564521163702011, "time": 1.467892336845398, "epoch": 601, "memory": 35394, "step": 188000}
{"lr": 0.00024360474557715453, "data_time": 0.003843545913696289, "grad_norm": 0.5705623418092728, "loss": 0.45397418141365053, "time": 1.4974747180938721, "epoch": 601, "memory": 35394, "step": 188100}
{"lr": 0.00024276948979321328, "data_time": 0.39701261520385744, "grad_norm": 0.6349877297878266, "loss": 0.4602352797985077, "time": 1.5001112222671509, "epoch": 602, "memory": 35394, "step": 188213}
{"lr": 0.00024203130799301838, "data_time": 0.05726170539855957, "grad_norm": 0.6027574062347412, "loss": 0.45998178124427797, "time": 1.4767752408981323, "epoch": 602, "memory": 35394, "step": 188313}
{"lr": 0.00024129405010701126, "data_time": 0.002550220489501953, "grad_norm": 0.6465689897537231, "loss": 0.4549914926290512, "time": 1.4968480348587037, "epoch": 602, "memory": 35394, "step": 188413}
{"lr": 0.00024046206211311352, "data_time": 0.4033778667449951, "grad_norm": 0.609531557559967, "loss": 0.4592635840177536, "time": 1.5229491233825683, "epoch": 603, "memory": 35394, "step": 188526}
{"lr": 0.00023972677623184622, "data_time": 0.06345150470733643, "grad_norm": 0.577638852596283, "loss": 0.45837334394454954, "time": 1.5567786693572998, "epoch": 603, "memory": 35394, "step": 188626}
{"lr": 0.0002389924180807328, "data_time": 0.002758502960205078, "grad_norm": 0.6013017207384109, "loss": 0.45996792912483214, "time": 1.5058884859085082, "epoch": 603, "memory": 35394, "step": 188726}
{"lr": 0.00023816371137356046, "data_time": 0.5394819498062133, "grad_norm": 0.6536453902721405, "loss": 0.45621239840984346, "time": 1.508218765258789, "epoch": 604, "memory": 35394, "step": 188839}
{"lr": 0.0002374313333391815, "data_time": 0.36694023609161375, "grad_norm": 0.6262362629175187, "loss": 0.46332067549228667, "time": 1.492696261405945, "epoch": 604, "memory": 35394, "step": 188939}
{"lr": 0.0002366998868358723, "data_time": 0.0026566028594970704, "grad_norm": 0.574428653717041, "loss": 0.45642968118190763, "time": 1.5355270624160766, "epoch": 604, "memory": 35394, "step": 189039}
{"lr": 0.00023587447485887752, "data_time": 0.4116081953048706, "grad_norm": 0.6589062988758088, "loss": 0.45578634440898896, "time": 1.5145163059234619, "epoch": 605, "memory": 35394, "step": 189152}
{"lr": 0.0002351450165521753, "data_time": 0.00209348201751709, "grad_norm": 0.6654430657625199, "loss": 0.4549575626850128, "time": 1.4928062915802003, "epoch": 605, "memory": 35394, "step": 189252}
{"lr": 0.00023441649356234688, "data_time": 0.002686810493469238, "grad_norm": 0.5330469459295273, "loss": 0.4555890291929245, "time": 1.5556125402450562, "epoch": 605, "memory": 35394, "step": 189352}
{"lr": 0.00023359438970553385, "data_time": 0.33723232746124265, "grad_norm": 0.6061002373695373, "loss": 0.45660209357738496, "time": 1.5452151536941527, "epoch": 606, "memory": 35394, "step": 189465}
{"lr": 0.00023286786295993186, "data_time": 0.17830443382263184, "grad_norm": 0.6377967029809952, "loss": 0.456454798579216, "time": 1.5288408279418946, "epoch": 606, "memory": 35394, "step": 189565}
{"lr": 0.00023214227530183584, "data_time": 0.0026555299758911134, "grad_norm": 0.635612279176712, "loss": 0.464698326587677, "time": 1.541888451576233, "epoch": 606, "memory": 35394, "step": 189665}
{"lr": 0.00023132349290154434, "data_time": 0.3570894956588745, "grad_norm": 0.5913010954856872, "loss": 0.45619488060474395, "time": 1.5057217121124267, "epoch": 607, "memory": 35394, "step": 189778}
{"lr": 0.00023059990950291063, "data_time": 0.0024326086044311524, "grad_norm": 0.6047623485326767, "loss": 0.4595413774251938, "time": 1.1826470375061036, "epoch": 607, "memory": 35394, "step": 189878}
{"lr": 0.00022987726894718087, "data_time": 0.003149533271789551, "grad_norm": 0.6497779726982117, "loss": 0.4617184340953827, "time": 1.5283560037612915, "epoch": 607, "memory": 35394, "step": 189978}
{"lr": 0.00022906182128586868, "data_time": 0.4785326957702637, "grad_norm": 0.6446343719959259, "loss": 0.45600537955760956, "time": 1.4748505353927612, "epoch": 608, "memory": 35394, "step": 190091}
{"lr": 0.00022834119297232263, "data_time": 0.04065999984741211, "grad_norm": 0.6338074564933777, "loss": 0.4574990451335907, "time": 1.5051612377166748, "epoch": 608, "memory": 35394, "step": 190191}
{"lr": 0.00022762151124178391, "data_time": 0.002710390090942383, "grad_norm": 0.587801918387413, "loss": 0.4529467225074768, "time": 1.4947357654571534, "epoch": 608, "memory": 35394, "step": 190291}
{"lr": 0.00022680941154781212, "data_time": 0.2001004695892334, "grad_norm": 0.5465520083904266, "loss": 0.4559942364692688, "time": 1.5219857215881347, "epoch": 609, "memory": 35394, "step": 190404}
{"lr": 0.00022609175000953645, "data_time": 0.002085757255554199, "grad_norm": 0.6152972996234893, "loss": 0.45692693293094633, "time": 1.554770016670227, "epoch": 609, "memory": 35394, "step": 190504}
{"lr": 0.0002253750387790148, "data_time": 0.0026708126068115236, "grad_norm": 0.6261006951332092, "loss": 0.45906947553157806, "time": 1.5209170579910278, "epoch": 609, "memory": 35394, "step": 190604}
{"lr": 0.00022456630022643333, "data_time": 0.33322241306304934, "grad_norm": 0.6480631351470947, "loss": 0.4611804932355881, "time": 1.5338581562042237, "epoch": 610, "memory": 35394, "step": 190717}
{"lr": 0.00022385161710548245, "data_time": 0.0023883581161499023, "grad_norm": 0.6783379197120667, "loss": 0.45472275614738467, "time": 1.5448192596435546, "epoch": 610, "memory": 35394, "step": 190817}
{"lr": 0.00022313788800161635, "data_time": 0.0025949478149414062, "grad_norm": 0.611807894706726, "loss": 0.46027095019817355, "time": 1.563496470451355, "epoch": 610, "memory": 35394, "step": 190917}
{"lr": 0.00022233252370994992, "data_time": 0.2385403633117676, "grad_norm": 0.6834794729948044, "loss": 0.460599422454834, "time": 1.5235373497009277, "epoch": 611, "memory": 35394, "step": 191030}
{"lr": 0.00022162083060006167, "data_time": 0.002125048637390137, "grad_norm": 0.5907806128263473, "loss": 0.45781307816505434, "time": 1.507970428466797, "epoch": 611, "memory": 35394, "step": 191130}
{"lr": 0.0002209100952011128, "data_time": 0.002105379104614258, "grad_norm": 0.6457323789596557, "loss": 0.46202537417411804, "time": 1.5281020641326903, "epoch": 611, "memory": 35394, "step": 191230}
{"lr": 0.00022010811823514794, "data_time": 0.2936784029006958, "grad_norm": 0.5885587155818939, "loss": 0.4514928996562958, "time": 1.5108741760253905, "epoch": 612, "memory": 35394, "step": 191343}
{"lr": 0.00021939942668155562, "data_time": 0.0020190954208374025, "grad_norm": 0.6581990897655488, "loss": 0.46160043478012086, "time": 1.5267096519470216, "epoch": 612, "memory": 35394, "step": 191443}
{"lr": 0.00021869169651722112, "data_time": 0.0023279905319213865, "grad_norm": 0.6027616500854492, "loss": 0.46483172178268434, "time": 1.4709349393844604, "epoch": 612, "memory": 35394, "step": 191543}
{"lr": 0.00021789311988679438, "data_time": 0.08583247661590576, "grad_norm": 0.5855861067771911, "loss": 0.45095255970954895, "time": 1.5188351392745971, "epoch": 613, "memory": 35394, "step": 191656}
{"lr": 0.00021718744138603922, "data_time": 0.002188587188720703, "grad_norm": 0.5805597096681595, "loss": 0.4608605742454529, "time": 1.5264109134674073, "epoch": 613, "memory": 35394, "step": 191756}
{"lr": 0.00021648272793726512, "data_time": 0.0026703834533691405, "grad_norm": 0.6716535806655883, "loss": 0.4576902359724045, "time": 1.4886403799057006, "epoch": 613, "memory": 35394, "step": 191856}
{"lr": 0.00021568756459705278, "data_time": 0.5093674659729004, "grad_norm": 0.5380606770515441, "loss": 0.4610195368528366, "time": 1.484993815422058, "epoch": 614, "memory": 35394, "step": 191969}
{"lr": 0.0002149849105967983, "data_time": 0.002022576332092285, "grad_norm": 0.5885387659072876, "loss": 0.45529280602931976, "time": 1.4593903303146363, "epoch": 614, "memory": 35394, "step": 192069}
{"lr": 0.0002142832252955919, "data_time": 0.0021483898162841797, "grad_norm": 0.633484160900116, "loss": 0.4530597537755966, "time": 1.4901451826095582, "epoch": 614, "memory": 35394, "step": 192169}
{"lr": 0.00021349148814489818, "data_time": 0.3560556650161743, "grad_norm": 0.6044077277183533, "loss": 0.4533493399620056, "time": 1.5147379875183105, "epoch": 615, "memory": 35394, "step": 192282}
{"lr": 0.000212791870043743, "data_time": 0.0019390583038330078, "grad_norm": 0.6214524507522583, "loss": 0.4579335540533066, "time": 1.5611674308776855, "epoch": 615, "memory": 35394, "step": 192382}
{"lr": 0.00021209322427298896, "data_time": 0.0028257369995117188, "grad_norm": 0.6020117223262786, "loss": 0.4560498148202896, "time": 1.4931100845336913, "epoch": 615, "memory": 35394, "step": 192482}
{"lr": 0.00021130492615553876, "data_time": 0.33049118518829346, "grad_norm": 0.5712101966142654, "loss": 0.4639240264892578, "time": 1.4874377965927124, "epoch": 616, "memory": 35394, "step": 192595}
{"lr": 0.00021060835530283288, "data_time": 0.0025454998016357423, "grad_norm": 0.5480147868394851, "loss": 0.4574532568454742, "time": 1.4915982246398927, "epoch": 616, "memory": 35394, "step": 192695}
{"lr": 0.00020991276039610798, "data_time": 0.00290985107421875, "grad_norm": 0.6224471062421799, "loss": 0.4514299988746643, "time": 1.4923190593719482, "epoch": 616, "memory": 35394, "step": 192795}
{"lr": 0.00020912791409983676, "data_time": 0.3868659496307373, "grad_norm": 0.6126601099967957, "loss": 0.4602078408002853, "time": 1.4949119329452514, "epoch": 617, "memory": 35394, "step": 192908}
{"lr": 0.00020843440179549693, "data_time": 0.00223996639251709, "grad_norm": 0.6221904546022415, "loss": 0.45981893241405486, "time": 1.5023788928985595, "epoch": 617, "memory": 35394, "step": 193008}
{"lr": 0.00020774186903688613, "data_time": 0.0029297828674316405, "grad_norm": 0.5985633641481399, "loss": 0.45350282788276675, "time": 1.572430467605591, "epoch": 617, "memory": 35394, "step": 193108}
{"lr": 0.00020696048729373323, "data_time": 0.4227538347244263, "grad_norm": 0.6481456398963928, "loss": 0.4585142433643341, "time": 1.4867748260498046, "epoch": 618, "memory": 35394, "step": 193221}
{"lr": 0.0002062700447880606, "data_time": 0.1664412021636963, "grad_norm": 0.5769077926874161, "loss": 0.4656270623207092, "time": 1.4371999979019165, "epoch": 618, "memory": 35394, "step": 193321}
{"lr": 0.00020558058541197377, "data_time": 0.003353309631347656, "grad_norm": 0.6868395686149598, "loss": 0.45838397145271303, "time": 1.5493321895599366, "epoch": 618, "memory": 35394, "step": 193421}
{"lr": 0.00020480268089767632, "data_time": 0.3792338132858276, "grad_norm": 0.5852908432483673, "loss": 0.4549910604953766, "time": 1.5207332134246827, "epoch": 619, "memory": 35394, "step": 193534}
{"lr": 0.00020411531939117228, "data_time": 0.002137899398803711, "grad_norm": 0.637200552225113, "loss": 0.4551289975643158, "time": 1.478987956047058, "epoch": 619, "memory": 35394, "step": 193634}
{"lr": 0.0002034289445821624, "data_time": 0.0023681163787841798, "grad_norm": 0.628640604019165, "loss": 0.46104109287261963, "time": 1.5334808826446533, "epoch": 619, "memory": 35394, "step": 193734}
{"lr": 0.00020265452991604846, "data_time": 0.40376944541931153, "grad_norm": 0.7104717910289764, "loss": 0.4623786687850952, "time": 1.5113038301467896, "epoch": 620, "memory": 35394, "step": 193847}
{"lr": 0.00020197026055923393, "data_time": 0.0020284414291381835, "grad_norm": 0.5715304344892502, "loss": 0.45988903045654295, "time": 1.3306589365005492, "epoch": 620, "memory": 35394, "step": 193947}
{"lr": 0.00020128698145181593, "data_time": 0.002798128128051758, "grad_norm": 0.6127889394760132, "loss": 0.452756741642952, "time": 1.4833804845809937, "epoch": 620, "memory": 35394, "step": 194047}
{"lr": 0.00020051606919660141, "data_time": 0.32708451747894285, "grad_norm": 0.6364431381225586, "loss": 0.4596168607473373, "time": 1.5140002727508546, "epoch": 621, "memory": 35394, "step": 194160}
{"lr": 0.00019983490308983582, "data_time": 0.0028450727462768556, "grad_norm": 0.6329334616661072, "loss": 0.46215797364711764, "time": 1.4917315244674683, "epoch": 621, "memory": 35394, "step": 194260}
{"lr": 0.00019915473076830562, "data_time": 0.0028400421142578125, "grad_norm": 0.6148299813270569, "loss": 0.4565796315670013, "time": 1.4921648740768432, "epoch": 621, "memory": 35394, "step": 194360}
{"lr": 0.00019838733342988858, "data_time": 0.19085798263549805, "grad_norm": 0.5905786544084549, "loss": 0.46045025289058683, "time": 1.5138803243637085, "epoch": 622, "memory": 35394, "step": 194473}
{"lr": 0.0001977092816231897, "data_time": 0.002347731590270996, "grad_norm": 0.6140859812498093, "loss": 0.4632310152053833, "time": 1.6031369686126709, "epoch": 622, "memory": 35394, "step": 194573}
{"lr": 0.00019703222712144321, "data_time": 0.0026740789413452148, "grad_norm": 0.6496687471866608, "loss": 0.4581210404634476, "time": 1.4946187019348145, "epoch": 622, "memory": 35394, "step": 194673}
{"lr": 0.00019626835714870304, "data_time": 0.4680037498474121, "grad_norm": 0.5983554691076278, "loss": 0.45450828671455384, "time": 1.632898712158203, "epoch": 623, "memory": 35394, "step": 194786}
{"lr": 0.0001955934306415685, "data_time": 0.002367591857910156, "grad_norm": 0.5682881027460098, "loss": 0.45435277819633485, "time": 1.490851092338562, "epoch": 623, "memory": 35394, "step": 194886}
{"lr": 0.00019491950494292406, "data_time": 0.002891683578491211, "grad_norm": 0.564924493432045, "loss": 0.4523267686367035, "time": 1.4884487390518188, "epoch": 623, "memory": 35394, "step": 194986}
{"lr": 0.00019415917472751743, "data_time": 0.4429797887802124, "grad_norm": 0.6022585391998291, "loss": 0.4573495924472809, "time": 1.5275803089141846, "epoch": 624, "memory": 35394, "step": 195099}
{"lr": 0.00019348738446874487, "data_time": 0.002057170867919922, "grad_norm": 0.5972998559474945, "loss": 0.457385727763176, "time": 1.4851656913757325, "epoch": 624, "memory": 35394, "step": 195199}
{"lr": 0.0001928165985057659, "data_time": 0.0031299352645874023, "grad_norm": 0.5949891388416291, "loss": 0.45793585777282714, "time": 1.4776811838150024, "epoch": 624, "memory": 35394, "step": 195299}
{"lr": 0.00019205982038192733, "data_time": 0.4822899103164673, "grad_norm": 0.6694034278392792, "loss": 0.45880845487117766, "time": 1.5206782341003418, "epoch": 625, "memory": 35394, "step": 195412}
{"lr": 0.00019139117726943767, "data_time": 0.24415988922119142, "grad_norm": 0.6214432954788208, "loss": 0.46197680532932284, "time": 1.536284375190735, "epoch": 625, "memory": 35394, "step": 195512}
{"lr": 0.00019072354192375247, "data_time": 0.0024519443511962892, "grad_norm": 0.6466541767120362, "loss": 0.4552276819944382, "time": 1.5081780433654786, "epoch": 625, "memory": 35394, "step": 195612}
{"lr": 0.00018997032816809453, "data_time": 0.4739311695098877, "grad_norm": 0.7282273530960083, "loss": 0.4586802631616592, "time": 1.506712222099304, "epoch": 626, "memory": 35394, "step": 195725}
{"lr": 0.0001893048430487552, "data_time": 0.0020268917083740234, "grad_norm": 0.5901078313589097, "loss": 0.45725829899311066, "time": 1.5200907468795777, "epoch": 626, "memory": 35394, "step": 195825}
{"lr": 0.00018864036915088335, "data_time": 0.002792954444885254, "grad_norm": 0.5803373336791993, "loss": 0.4603756874799728, "time": 1.4862766504287719, "epoch": 626, "memory": 35394, "step": 195925}
{"lr": 0.00018789073198219517, "data_time": 0.18479387760162352, "grad_norm": 0.5642473012208938, "loss": 0.45713297426700594, "time": 1.486422562599182, "epoch": 627, "memory": 35394, "step": 196038}
{"lr": 0.0001872284156516435, "data_time": 0.0025880098342895507, "grad_norm": 0.6103913724422455, "loss": 0.4565363585948944, "time": 1.5034697294235229, "epoch": 627, "memory": 35394, "step": 196138}
{"lr": 0.00018656711398081863, "data_time": 0.0029042243957519533, "grad_norm": 0.6460892468690872, "loss": 0.456474506855011, "time": 1.5093944072723389, "epoch": 627, "memory": 35394, "step": 196238}
{"lr": 0.00018582106555987008, "data_time": 0.26101012229919435, "grad_norm": 0.6558946907520294, "loss": 0.45904851853847506, "time": 1.5094037294387816, "epoch": 628, "memory": 35394, "step": 196351}
{"lr": 0.00018516192876233994, "data_time": 0.0019234418869018555, "grad_norm": 0.5808244049549103, "loss": 0.4604679733514786, "time": 1.4996693849563598, "epoch": 628, "memory": 35394, "step": 196451}
{"lr": 0.00018450381004633435, "data_time": 0.0027101755142211912, "grad_norm": 0.5908566147089005, "loss": 0.4584722638130188, "time": 1.4727388143539428, "epoch": 628, "memory": 35394, "step": 196551}
{"lr": 0.0001837613624756778, "data_time": 0.350780725479126, "grad_norm": 0.5904825359582901, "loss": 0.45847649276256563, "time": 1.4112923860549926, "epoch": 629, "memory": 35394, "step": 196664}
{"lr": 0.0001831054159038224, "data_time": 0.002101445198059082, "grad_norm": 0.6067486763000488, "loss": 0.46078433096408844, "time": 1.4890531778335572, "epoch": 629, "memory": 35394, "step": 196764}
{"lr": 0.00018245049081877515, "data_time": 0.002332592010498047, "grad_norm": 0.5536105036735535, "loss": 0.45463736057281495, "time": 1.4770303249359131, "epoch": 629, "memory": 35394, "step": 196864}
{"lr": 0.00018171165614254932, "data_time": 0.324245810508728, "grad_norm": 0.6562404990196228, "loss": 0.4582914710044861, "time": 1.4895278215408325, "epoch": 630, "memory": 35394, "step": 196977}
{"lr": 0.00018105891043726993, "data_time": 0.002061152458190918, "grad_norm": 0.5872992128133774, "loss": 0.4572949826717377, "time": 1.4993608474731446, "epoch": 630, "memory": 35394, "step": 197077}
{"lr": 0.00018040718960751163, "data_time": 0.002841472625732422, "grad_norm": 0.6021020501852036, "loss": 0.46080576777458193, "time": 1.4612943172454833, "epoch": 630, "memory": 35394, "step": 197177}
{"lr": 0.00017967197981124568, "data_time": 0.09314918518066406, "grad_norm": 0.6545082449913024, "loss": 0.4610728591680527, "time": 1.511980700492859, "epoch": 631, "memory": 35394, "step": 197290}
{"lr": 0.00017902244556151868, "data_time": 0.002264714241027832, "grad_norm": 0.6578700423240662, "loss": 0.4621226489543915, "time": 1.4465371131896974, "epoch": 631, "memory": 35394, "step": 197390}
{"lr": 0.00017837393955939956, "data_time": 0.0027771711349487303, "grad_norm": 0.5941910207271576, "loss": 0.4551434934139252, "time": 1.4943088293075562, "epoch": 631, "memory": 35394, "step": 197490}
{"lr": 0.0001776423665698197, "data_time": 0.3876016139984131, "grad_norm": 0.5858952730894089, "loss": 0.4638731747865677, "time": 1.4965985536575317, "epoch": 632, "memory": 35394, "step": 197603}
{"lr": 0.00017699605431252506, "data_time": 0.002246594429016113, "grad_norm": 0.611446076631546, "loss": 0.4566203743219376, "time": 1.5266342878341674, "epoch": 632, "memory": 35394, "step": 197703}
{"lr": 0.0001763507736582438, "data_time": 0.002726626396179199, "grad_norm": 0.6030689597129821, "loss": 0.4580691695213318, "time": 1.5288950443267821, "epoch": 632, "memory": 35394, "step": 197803}
{"lr": 0.00017562284934307868, "data_time": 0.35678207874298096, "grad_norm": 0.6218982577323914, "loss": 0.45994298458099364, "time": 1.5085591316223144, "epoch": 633, "memory": 35394, "step": 197916}
{"lr": 0.00017497976956282752, "data_time": 0.0021439313888549803, "grad_norm": 0.5440431356430053, "loss": 0.4559579610824585, "time": 1.5250706434249879, "epoch": 633, "memory": 35394, "step": 198016}
{"lr": 0.00017433772472426077, "data_time": 0.002698850631713867, "grad_norm": 0.5836275696754456, "loss": 0.4588495880365372, "time": 1.533410096168518, "epoch": 633, "memory": 35394, "step": 198116}
{"lr": 0.00017361346089204982, "data_time": 0.3439712762832642, "grad_norm": 0.6193003684282303, "loss": 0.45428244173526766, "time": 1.540846014022827, "epoch": 634, "memory": 35394, "step": 198229}
{"lr": 0.00017297362402101594, "data_time": 0.0019283533096313477, "grad_norm": 0.6391590237617493, "loss": 0.46071707010269164, "time": 1.518014907836914, "epoch": 634, "memory": 35394, "step": 198329}
{"lr": 0.0001723348254135492, "data_time": 0.002728772163391113, "grad_norm": 0.6343146204948426, "loss": 0.4557459831237793, "time": 1.4691046476364136, "epoch": 634, "memory": 35394, "step": 198429}
{"lr": 0.00017161423381345005, "data_time": 0.4206789255142212, "grad_norm": 0.588042888045311, "loss": 0.46175412833690643, "time": 1.4982923030853272, "epoch": 635, "memory": 35394, "step": 198542}
{"lr": 0.00017097765023119956, "data_time": 0.16457343101501465, "grad_norm": 0.6745334506034851, "loss": 0.4589086890220642, "time": 1.515765404701233, "epoch": 635, "memory": 35394, "step": 198642}
{"lr": 0.00017034210821755596, "data_time": 0.0028059959411621095, "grad_norm": 0.600747263431549, "loss": 0.45174013674259184, "time": 1.346580934524536, "epoch": 635, "memory": 35394, "step": 198742}
{"lr": 0.00016962520053915594, "data_time": 0.38223183155059814, "grad_norm": 0.5778063416481019, "loss": 0.4542313665151596, "time": 1.4956583976745605, "epoch": 636, "memory": 35394, "step": 198855}
{"lr": 0.00016899188057248, "data_time": 0.07835793495178223, "grad_norm": 0.6533055245876312, "loss": 0.45330215990543365, "time": 1.5446667194366455, "epoch": 636, "memory": 35394, "step": 198955}
{"lr": 0.00016835960546255385, "data_time": 0.0027867555618286133, "grad_norm": 0.5547352373600006, "loss": 0.45659551918506625, "time": 1.5714869976043702, "epoch": 636, "memory": 35394, "step": 199055}
{"lr": 0.00016764639333567761, "data_time": 0.5071425199508667, "grad_norm": 0.5565404385328293, "loss": 0.4603860259056091, "time": 1.4991099119186402, "epoch": 637, "memory": 35394, "step": 199168}
{"lr": 0.00016701634725842438, "data_time": 0.1905494213104248, "grad_norm": 0.6020655333995819, "loss": 0.4564127892255783, "time": 1.4855632066726685, "epoch": 637, "memory": 35394, "step": 199268}
{"lr": 0.00016638734930911358, "data_time": 0.002939653396606445, "grad_norm": 0.5955411344766617, "loss": 0.45799513161182404, "time": 1.5335999488830567, "epoch": 637, "memory": 35394, "step": 199368}
{"lr": 0.00016567784430363632, "data_time": 0.20358493328094482, "grad_norm": 0.6042977392673492, "loss": 0.4597959339618683, "time": 1.5139747142791748, "epoch": 638, "memory": 35394, "step": 199481}
{"lr": 0.000165051082336544, "data_time": 0.002159404754638672, "grad_norm": 0.6216610968112946, "loss": 0.4552553713321686, "time": 1.4835384130477904, "epoch": 638, "memory": 35394, "step": 199581}
{"lr": 0.0001644253717515841, "data_time": 0.0026172399520874023, "grad_norm": 0.6075050354003906, "loss": 0.4555375874042511, "time": 1.4927724123001098, "epoch": 638, "memory": 35394, "step": 199681}
{"lr": 0.00016371958537724336, "data_time": 0.3052979946136475, "grad_norm": 0.611453852057457, "loss": 0.4548335462808609, "time": 1.5068635940551758, "epoch": 639, "memory": 35394, "step": 199794}
{"lr": 0.00016309611768777475, "data_time": 0.0019807815551757812, "grad_norm": 0.5691877275705337, "loss": 0.4537388652563095, "time": 1.539613699913025, "epoch": 639, "memory": 35394, "step": 199894}
{"lr": 0.0001624737046175726, "data_time": 0.002320551872253418, "grad_norm": 0.5931299895048141, "loss": 0.4577020823955536, "time": 1.4868125200271607, "epoch": 639, "memory": 35394, "step": 199994}
{"lr": 0.00016177164832378136, "data_time": 0.16354773044586182, "grad_norm": 0.6216659307479858, "loss": 0.46197803020477296, "time": 1.3551586151123047, "epoch": 640, "memory": 35394, "step": 200107}
{"lr": 0.0001611514850259583, "data_time": 0.001972198486328125, "grad_norm": 0.5795205295085907, "loss": 0.45290039777755736, "time": 1.4791353225708008, "epoch": 640, "memory": 35394, "step": 200207}
{"lr": 0.00016053237956742812, "data_time": 0.0027375221252441406, "grad_norm": 0.541269525885582, "loss": 0.4616907209157944, "time": 1.5315165281295777, "epoch": 640, "memory": 35394, "step": 200307}
{"lr": 0.00015983406474308912, "data_time": 0.42333953380584716, "grad_norm": 0.5593390166759491, "loss": 0.4588819980621338, "time": 1.4787928581237793, "epoch": 641, "memory": 35394, "step": 200420}
{"lr": 0.0001592172158973295, "data_time": 0.17508118152618407, "grad_norm": 0.6196498245000839, "loss": 0.4568918734788895, "time": 1.469118094444275, "epoch": 641, "memory": 35394, "step": 200520}
{"lr": 0.00015860142809372813, "data_time": 0.0026981592178344726, "grad_norm": 0.6301821112632752, "loss": 0.4578630954027176, "time": 1.5140965223312377, "epoch": 641, "memory": 35394, "step": 200620}
{"lr": 0.00015790686606704962, "data_time": 0.572533655166626, "grad_norm": 0.604551887512207, "loss": 0.45073025226593016, "time": 1.5055941343307495, "epoch": 642, "memory": 35394, "step": 200733}
{"lr": 0.00015729334168000368, "data_time": 0.12997920513153077, "grad_norm": 0.5944705039262772, "loss": 0.45782305002212526, "time": 1.3109756469726563, "epoch": 642, "memory": 35394, "step": 200833}
{"lr": 0.0001566808815207688, "data_time": 0.0027031898498535156, "grad_norm": 0.6456506729125977, "loss": 0.4596684992313385, "time": 1.5787033557891845, "epoch": 642, "memory": 35394, "step": 200933}
{"lr": 0.00015599008355907985, "data_time": 0.5360142230987549, "grad_norm": 0.644164577126503, "loss": 0.4505973935127258, "time": 1.456685447692871, "epoch": 643, "memory": 35394, "step": 201046}
{"lr": 0.0001553798935834673, "data_time": 0.13787901401519775, "grad_norm": 0.6202894270420074, "loss": 0.45845730006694796, "time": 1.512028408050537, "epoch": 643, "memory": 35394, "step": 201146}
{"lr": 0.00015477077100405494, "data_time": 0.002272796630859375, "grad_norm": 0.6054240167140961, "loss": 0.45616360902786257, "time": 1.5083559274673461, "epoch": 643, "memory": 35394, "step": 201246}
{"lr": 0.0001540837483136237, "data_time": 0.4503068447113037, "grad_norm": 0.6190340459346771, "loss": 0.4566222369670868, "time": 1.4677533388137818, "epoch": 644, "memory": 35394, "step": 201359}
{"lr": 0.0001534769026480726, "data_time": 0.18655431270599365, "grad_norm": 0.5591786414384842, "loss": 0.4591507017612457, "time": 1.3666127681732179, "epoch": 644, "memory": 35394, "step": 201459}
{"lr": 0.00015287112752979557, "data_time": 0.0026941776275634767, "grad_norm": 0.5921172142028809, "loss": 0.45909123718738554, "time": 1.57388117313385, "epoch": 644, "memory": 35394, "step": 201559}
{"lr": 0.00015218789125564668, "data_time": 0.430092978477478, "grad_norm": 0.6208554089069367, "loss": 0.4565631508827209, "time": 1.5236220121383668, "epoch": 645, "memory": 35394, "step": 201672}
{"lr": 0.00015158439974453377, "data_time": 0.14704496860504152, "grad_norm": 0.5867479294538498, "loss": 0.4626991659402847, "time": 1.5092929601669312, "epoch": 645, "memory": 35394, "step": 201772}
{"lr": 0.0001509819819144016, "data_time": 0.0023807287216186523, "grad_norm": 0.6300670087337494, "loss": 0.45732704401016233, "time": 1.494176435470581, "epoch": 645, "memory": 35394, "step": 201872}
{"lr": 0.00015030254314013557, "data_time": 0.4507366895675659, "grad_norm": 0.6045912116765976, "loss": 0.45837506651878357, "time": 1.4823008537292481, "epoch": 646, "memory": 35394, "step": 201985}
{"lr": 0.00014970241557342554, "data_time": 0.00720362663269043, "grad_norm": 0.6373814702033996, "loss": 0.4582427263259888, "time": 1.4786646842956543, "epoch": 646, "memory": 35394, "step": 202085}
{"lr": 0.000149103364803985, "data_time": 0.002553844451904297, "grad_norm": 0.5840985894203186, "loss": 0.45935330986976625, "time": 1.5143234729766846, "epoch": 646, "memory": 35394, "step": 202185}
{"lr": 0.00014842773455159874, "data_time": 0.24223752021789552, "grad_norm": 0.6198639065027237, "loss": 0.4588489532470703, "time": 1.4629765510559083, "epoch": 647, "memory": 35394, "step": 202298}
{"lr": 0.00014783098066468536, "data_time": 0.019613766670227052, "grad_norm": 0.6017158836126327, "loss": 0.4591652423143387, "time": 1.5064185380935669, "epoch": 647, "memory": 35394, "step": 202398}
{"lr": 0.00014723530667386264, "data_time": 0.0021355152130126953, "grad_norm": 0.5876849681138993, "loss": 0.45946877002716063, "time": 1.4826281785964965, "epoch": 647, "memory": 35394, "step": 202498}
{"lr": 0.00014656349590357013, "data_time": 0.4100576162338257, "grad_norm": 0.5808157026767731, "loss": 0.45594415068626404, "time": 1.5553017139434815, "epoch": 648, "memory": 35394, "step": 202611}
{"lr": 0.00014597012537711867, "data_time": 0.0021526336669921873, "grad_norm": 0.5914739698171616, "loss": 0.45565991699695585, "time": 1.4964560747146607, "epoch": 648, "memory": 35394, "step": 202711}
{"lr": 0.00014537783782806151, "data_time": 0.0029710769653320313, "grad_norm": 0.5293037801980972, "loss": 0.458294090628624, "time": 1.4817208766937255, "epoch": 648, "memory": 35394, "step": 202811}
{"lr": 0.00014470985743811638, "data_time": 0.2913050174713135, "grad_norm": 0.5775049567222595, "loss": 0.4561348080635071, "time": 1.481521773338318, "epoch": 649, "memory": 35394, "step": 202924}
{"lr": 0.00014411987989790655, "data_time": 0.0024022340774536135, "grad_norm": 0.6215729773044586, "loss": 0.46215463280677793, "time": 1.5155323266983032, "epoch": 649, "memory": 35394, "step": 203024}
{"lr": 0.00014353098839882573, "data_time": 0.002551555633544922, "grad_norm": 0.593265900015831, "loss": 0.46444415748119355, "time": 1.4869060516357422, "epoch": 649, "memory": 35394, "step": 203124}
{"lr": 0.00014286684922534495, "data_time": 0.3252631902694702, "grad_norm": 0.6087060630321502, "loss": 0.45985875427722933, "time": 1.5217123985290528, "epoch": 650, "memory": 35394, "step": 203237}
{"lr": 0.0001422802742421148, "data_time": 0.0027477264404296873, "grad_norm": 0.6331838130950928, "loss": 0.4542069315910339, "time": 1.4899157285690308, "epoch": 650, "memory": 35394, "step": 203337}
{"lr": 0.00014169478834613066, "data_time": 0.0029901742935180666, "grad_norm": 0.5599701017141342, "loss": 0.45956091582775116, "time": 1.4830462694168092, "epoch": 650, "memory": 35394, "step": 203437}
{"lr": 0.00014103450116291746, "data_time": 0.09039602279663086, "grad_norm": 0.5820776700973511, "loss": 0.45930692851543425, "time": 1.2131474494934082, "epoch": 651, "memory": 35394, "step": 203550}
{"lr": 0.0001404513382522086, "data_time": 0.0023269176483154295, "grad_norm": 0.5894873023033143, "loss": 0.46217744052410126, "time": 1.5060668468475342, "epoch": 651, "memory": 35394, "step": 203650}
{"lr": 0.0001398692674571944, "data_time": 0.0029006004333496094, "grad_norm": 0.5847391694784164, "loss": 0.452800789475441, "time": 1.5272506952285767, "epoch": 651, "memory": 35394, "step": 203750}
{"lr": 0.00013921284297556448, "data_time": 0.4401864051818848, "grad_norm": 0.6084377765655518, "loss": 0.4548187404870987, "time": 1.4635892391204834, "epoch": 652, "memory": 35394, "step": 203863}
{"lr": 0.0001386331015975664, "data_time": 0.21313872337341308, "grad_norm": 0.666178959608078, "loss": 0.45395594239234927, "time": 1.4883691310882567, "epoch": 652, "memory": 35394, "step": 203963}
{"lr": 0.000138054455345996, "data_time": 0.08561673164367675, "grad_norm": 0.5680290192365647, "loss": 0.45502259135246276, "time": 1.470278549194336, "epoch": 652, "memory": 35394, "step": 204063}
{"lr": 0.00013740190421460366, "data_time": 0.3722574472427368, "grad_norm": 0.6011995136737823, "loss": 0.46618102192878724, "time": 1.4990053176879883, "epoch": 653, "memory": 35394, "step": 204176}
{"lr": 0.0001368255937740014, "data_time": 0.0018523693084716796, "grad_norm": 0.5559994220733643, "loss": 0.46039126217365267, "time": 1.3164072513580323, "epoch": 653, "memory": 35394, "step": 204276}
{"lr": 0.00013625038145279405, "data_time": 0.0027178287506103515, "grad_norm": 0.5878157168626785, "loss": 0.453668075799942, "time": 1.498523736000061, "epoch": 653, "memory": 35394, "step": 204376}
{"lr": 0.00013560171425745896, "data_time": 0.3335241794586182, "grad_norm": 0.6021636009216309, "loss": 0.4585204511880875, "time": 1.4930536031723023, "epoch": 654, "memory": 35394, "step": 204489}
{"lr": 0.00013502884410327988, "data_time": 0.002042412757873535, "grad_norm": 0.6054206222295762, "loss": 0.45180230736732485, "time": 1.4689949035644532, "epoch": 654, "memory": 35394, "step": 204589}
{"lr": 0.0001344570750436494, "data_time": 0.00272829532623291, "grad_norm": 0.6108379185199737, "loss": 0.4618451327085495, "time": 1.4978961944580078, "epoch": 654, "memory": 35394, "step": 204689}
{"lr": 0.0001338123023071855, "data_time": 0.5622831106185913, "grad_norm": 0.6067492723464966, "loss": 0.4612774908542633, "time": 1.5331340074539184, "epoch": 655, "memory": 35394, "step": 204802}
{"lr": 0.00013324288173264794, "data_time": 0.30476980209350585, "grad_norm": 0.5270440340042114, "loss": 0.45771121680736543, "time": 1.489989733695984, "epoch": 655, "memory": 35394, "step": 204902}
{"lr": 0.00013267456520995073, "data_time": 0.002608919143676758, "grad_norm": 0.6427339553833008, "loss": 0.45013793706893923, "time": 1.5901789665222168, "epoch": 655, "memory": 35394, "step": 205002}
{"lr": 0.0001320336973919952, "data_time": 0.42593319416046144, "grad_norm": 0.6160616517066956, "loss": 0.45568853318691255, "time": 1.5167390823364257, "epoch": 656, "memory": 35394, "step": 205115}
{"lr": 0.00013146773563435854, "data_time": 0.0019665956497192383, "grad_norm": 0.5138365447521209, "loss": 0.453190290927887, "time": 1.5215694427490234, "epoch": 656, "memory": 35394, "step": 205215}
{"lr": 0.00013090288086794295, "data_time": 0.00271146297454834, "grad_norm": 0.6401343882083893, "loss": 0.45665638744831083, "time": 1.4712866306304933, "epoch": 656, "memory": 35394, "step": 205315}
{"lr": 0.0001302659283647863, "data_time": 0.3829490184783936, "grad_norm": 0.5502463459968567, "loss": 0.45690740644931793, "time": 1.5149660587310791, "epoch": 657, "memory": 35394, "step": 205428}
{"lr": 0.00012970343460519924, "data_time": 0.0026459693908691406, "grad_norm": 0.639063709974289, "loss": 0.4613881915807724, "time": 1.511761736869812, "epoch": 657, "memory": 35394, "step": 205528}
{"lr": 0.00012914205075825665, "data_time": 0.002619028091430664, "grad_norm": 0.572563773393631, "loss": 0.4628710955381393, "time": 1.4795682430267334, "epoch": 657, "memory": 35394, "step": 205628}
{"lr": 0.00012850902390267502, "data_time": 0.2619884252548218, "grad_norm": 0.6296266436576843, "loss": 0.4603515565395355, "time": 1.4708629846572876, "epoch": 658, "memory": 35394, "step": 205741}
{"lr": 0.0001279500072660275, "data_time": 0.002042222023010254, "grad_norm": 0.5870114684104919, "loss": 0.457356858253479, "time": 1.5839374542236329, "epoch": 658, "memory": 35394, "step": 205841}
{"lr": 0.00012739210344544323, "data_time": 0.0027482032775878905, "grad_norm": 0.5764827579259872, "loss": 0.45201392769813536, "time": 1.5254395723342895, "epoch": 658, "memory": 35394, "step": 205941}
{"lr": 0.00012676301250652932, "data_time": 0.39144320487976075, "grad_norm": 0.6072818517684937, "loss": 0.45916119813919065, "time": 1.4625054121017456, "epoch": 659, "memory": 35394, "step": 206054}
{"lr": 0.00012620748206130595, "data_time": 0.14191112518310547, "grad_norm": 0.6225778102874756, "loss": 0.4585974425077438, "time": 1.4947101831436158, "epoch": 659, "memory": 35394, "step": 206154}
{"lr": 0.00012565306731751128, "data_time": 0.0023151397705078124, "grad_norm": 0.6250585913658142, "loss": 0.4568087697029114, "time": 1.577182126045227, "epoch": 659, "memory": 35394, "step": 206254}
{"lr": 0.00012502792250050884, "data_time": 0.3732069253921509, "grad_norm": 0.6007469356060028, "loss": 0.4621353656053543, "time": 1.439829659461975, "epoch": 660, "memory": 35394, "step": 206367}
{"lr": 0.00012447588725863996, "data_time": 0.002112126350402832, "grad_norm": 0.6463676810264587, "loss": 0.45475906431674956, "time": 1.499531078338623, "epoch": 660, "memory": 35394, "step": 206467}
{"lr": 0.0001239249705854657, "data_time": 0.002378988265991211, "grad_norm": 0.5958064496517181, "loss": 0.45192932784557344, "time": 1.597442054748535, "epoch": 660, "memory": 35394, "step": 206567}
{"lr": 0.00012330378203160387, "data_time": 0.4528591871261597, "grad_norm": 0.6382226824760437, "loss": 0.45667547881603243, "time": 1.5464436292648316, "epoch": 661, "memory": 35394, "step": 206680}
{"lr": 0.00012275525094831952, "data_time": 0.06784639358520508, "grad_norm": 0.5298264056444169, "loss": 0.46226389706134796, "time": 1.4704353094100953, "epoch": 661, "memory": 35394, "step": 206780}
{"lr": 0.00012220784128285077, "data_time": 0.002570796012878418, "grad_norm": 0.5740325063467026, "loss": 0.46117747724056246, "time": 1.6435017824172973, "epoch": 661, "memory": 35394, "step": 206880}
{"lr": 0.00012159061906917924, "data_time": 0.49961755275726316, "grad_norm": 0.6026416629552841, "loss": 0.45838444828987124, "time": 1.4762629508972167, "epoch": 662, "memory": 35394, "step": 206993}
{"lr": 0.00012104560104286437, "data_time": 0.1756763458251953, "grad_norm": 0.6103036731481553, "loss": 0.45507611334323883, "time": 1.563780379295349, "epoch": 662, "memory": 35394, "step": 207093}
{"lr": 0.00012050170726529454, "data_time": 0.00244448184967041, "grad_norm": 0.5923826068639755, "loss": 0.46434213519096373, "time": 1.6009593963623048, "epoch": 662, "memory": 35394, "step": 207193}
{"lr": 0.0001198884614045197, "data_time": 0.28563926219940183, "grad_norm": 0.6242716610431671, "loss": 0.4527875244617462, "time": 1.6081276655197143, "epoch": 663, "memory": 35394, "step": 207306}
{"lr": 0.00011934696527656958, "data_time": 0.0020861387252807616, "grad_norm": 0.5850646376609803, "loss": 0.45500326752662656, "time": 1.606808567047119, "epoch": 663, "memory": 35394, "step": 207406}
{"lr": 0.00011880659621005613, "data_time": 0.0025831937789916994, "grad_norm": 0.6034570515155793, "loss": 0.4584624618291855, "time": 1.6290698051452637, "epoch": 663, "memory": 35394, "step": 207506}
{"lr": 0.00011819733665038005, "data_time": 0.08431351184844971, "grad_norm": 0.5451927840709686, "loss": 0.4551060199737549, "time": 1.5791436910629273, "epoch": 664, "memory": 35394, "step": 207619}
{"lr": 0.00011765937120505742, "data_time": 0.0027232646942138674, "grad_norm": 0.5998818337917328, "loss": 0.45927879214286804, "time": 1.6435339450836182, "epoch": 664, "memory": 35394, "step": 207719}
{"lr": 0.00011712253561557938, "data_time": 0.003058767318725586, "grad_norm": 0.6535022854804993, "loss": 0.4611719369888306, "time": 1.6594682931900024, "epoch": 664, "memory": 35394, "step": 207819}
{"lr": 0.00011651727224053702, "data_time": 0.5099926948547363, "grad_norm": 0.6186152815818786, "loss": 0.45572868883609774, "time": 1.5654015302658082, "epoch": 665, "memory": 35394, "step": 207932}
{"lr": 0.00011598284620482918, "data_time": 0.0021628141403198242, "grad_norm": 0.5186330765485764, "loss": 0.45577001869678496, "time": 1.56277437210083, "epoch": 665, "memory": 35394, "step": 208032}
{"lr": 0.00011544955280104487, "data_time": 0.0022866487503051757, "grad_norm": 0.6572570025920867, "loss": 0.4541894197463989, "time": 1.7039907455444336, "epoch": 665, "memory": 35394, "step": 208132}
{"lr": 0.00011484829542934407, "data_time": 0.41164729595184324, "grad_norm": 0.6225551128387451, "loss": 0.46232262551784514, "time": 1.5451319932937622, "epoch": 666, "memory": 35394, "step": 208245}
{"lr": 0.00011431741747282104, "data_time": 0.0022287368774414062, "grad_norm": 0.5636282294988633, "loss": 0.45474712550640106, "time": 1.4732435464859008, "epoch": 666, "memory": 35394, "step": 208345}
{"lr": 0.00011378767490592673, "data_time": 0.002733325958251953, "grad_norm": 0.5392046242952346, "loss": 0.44693114459514616, "time": 1.5555570125579834, "epoch": 666, "memory": 35394, "step": 208445}
{"lr": 0.00011319043329128953, "data_time": 0.3595449924468994, "grad_norm": 0.5484383404254913, "loss": 0.4560818880796432, "time": 1.5580251455307006, "epoch": 667, "memory": 35394, "step": 208558}
{"lr": 0.00011266311202596381, "data_time": 0.0021033525466918946, "grad_norm": 0.6430172353982926, "loss": 0.4587183237075806, "time": 1.5304063320159913, "epoch": 667, "memory": 35394, "step": 208658}
{"lr": 0.00011213692888955342, "data_time": 0.002782273292541504, "grad_norm": 0.5967853546142579, "loss": 0.45437573194503783, "time": 1.6002289533615113, "epoch": 667, "memory": 35394, "step": 208758}
{"lr": 0.00011154371272055728, "data_time": 0.3095091819763184, "grad_norm": 0.5990992248058319, "loss": 0.4601636856794357, "time": 1.5560492515563964, "epoch": 668, "memory": 35394, "step": 208871}
{"lr": 0.00011101995670074354, "data_time": 0.0024502038955688476, "grad_norm": 0.6414493381977081, "loss": 0.45623907148838044, "time": 1.584131932258606, "epoch": 668, "memory": 35394, "step": 208971}
{"lr": 0.0001104973415306691, "data_time": 0.003050684928894043, "grad_norm": 0.5689112156629562, "loss": 0.4570414662361145, "time": 1.519471526145935, "epoch": 668, "memory": 35394, "step": 209071}
{"lr": 0.00010990816043058961, "data_time": 0.41105427742004397, "grad_norm": 0.5823472708463668, "loss": 0.4562831521034241, "time": 1.5627211332321167, "epoch": 669, "memory": 35394, "step": 209184}
{"lr": 0.00010938797815276698, "data_time": 0.0022078990936279298, "grad_norm": 0.6091338187456131, "loss": 0.4566074967384338, "time": 1.5366122245788574, "epoch": 669, "memory": 35394, "step": 209284}
{"lr": 0.00010886893942699995, "data_time": 0.0023509740829467775, "grad_norm": 0.5968838900327682, "loss": 0.4589085251092911, "time": 1.5104755640029908, "epoch": 669, "memory": 35394, "step": 209384}
{"lr": 0.00010828380295365542, "data_time": 0.5965963363647461, "grad_norm": 0.6300504446029663, "loss": 0.45470289289951327, "time": 1.5344863891601563, "epoch": 670, "memory": 35394, "step": 209497}
{"lr": 0.00010776720285632866, "data_time": 0.3105576276779175, "grad_norm": 0.6109273076057434, "loss": 0.4569434404373169, "time": 1.5340564966201782, "epoch": 670, "memory": 35394, "step": 209597}
{"lr": 0.00010725174899482342, "data_time": 0.002591252326965332, "grad_norm": 0.6203583270311356, "loss": 0.4485148161649704, "time": 1.5469893932342529, "epoch": 670, "memory": 35394, "step": 209697}
{"lr": 0.00010667066664041893, "data_time": 0.5539749145507813, "grad_norm": 0.6859166711568833, "loss": 0.45491284430027007, "time": 1.492490577697754, "epoch": 671, "memory": 35394, "step": 209810}
{"lr": 0.00010615765710398197, "data_time": 0.4192476987838745, "grad_norm": 0.6155709594488143, "loss": 0.4605124443769455, "time": 1.5443883180618285, "epoch": 671, "memory": 35394, "step": 209910}
{"lr": 0.00010564579646853804, "data_time": 0.003223896026611328, "grad_norm": 0.5854030430316925, "loss": 0.45787923634052274, "time": 1.5493571281433105, "epoch": 671, "memory": 35394, "step": 210010}
{"lr": 0.00010506877765951243, "data_time": 0.6112744569778442, "grad_norm": 0.5664922893047333, "loss": 0.4589253604412079, "time": 1.5460806846618653, "epoch": 672, "memory": 35394, "step": 210123}
{"lr": 0.0001045593670061122, "data_time": 0.1250993251800537, "grad_norm": 0.5938596040010452, "loss": 0.45644947290420534, "time": 1.5492676496505737, "epoch": 672, "memory": 35394, "step": 210223}
{"lr": 0.00010405110790023887, "data_time": 0.002240777015686035, "grad_norm": 0.6057829141616822, "loss": 0.45785359144210813, "time": 1.751986312866211, "epoch": 672, "memory": 35394, "step": 210323}
{"lr": 0.00010347816199711088, "data_time": 0.5220611810684204, "grad_norm": 0.5895208984613418, "loss": 0.4577414572238922, "time": 1.5420966625213623, "epoch": 673, "memory": 35394, "step": 210436}
{"lr": 0.00010297235849051272, "data_time": 0.002718496322631836, "grad_norm": 0.5759982615709305, "loss": 0.46193557381629946, "time": 1.5203978300094605, "epoch": 673, "memory": 35394, "step": 210536}
{"lr": 0.00010246770915929482, "data_time": 0.00272517204284668, "grad_norm": 0.5699781447649002, "loss": 0.45833171606063844, "time": 1.5480802536010743, "epoch": 673, "memory": 35394, "step": 210636}
{"lr": 0.00010189884545651161, "data_time": 0.3407046556472778, "grad_norm": 0.5586802124977112, "loss": 0.4501643717288971, "time": 1.550977873802185, "epoch": 674, "memory": 35394, "step": 210749}
{"lr": 0.0001013966573019656, "data_time": 0.0024539947509765623, "grad_norm": 0.5657594919204711, "loss": 0.4623663753271103, "time": 1.5701689004898072, "epoch": 674, "memory": 35394, "step": 210849}
{"lr": 0.00010089562593192885, "data_time": 0.003100085258483887, "grad_norm": 0.626784598827362, "loss": 0.46123212575912476, "time": 1.4864736318588256, "epoch": 674, "memory": 35394, "step": 210949}
{"lr": 0.00010033085365771542, "data_time": 0.30337135791778563, "grad_norm": 0.6070349484682083, "loss": 0.4552170544862747, "time": 1.5397397756576539, "epoch": 675, "memory": 35394, "step": 211062}
{"lr": 9.983228900182174e-05, "data_time": 0.0025952816009521484, "grad_norm": 0.5962685525417328, "loss": 0.46001838743686674, "time": 1.537744927406311, "epoch": 675, "memory": 35394, "step": 211162}
{"lr": 9.933488372080124e-05, "data_time": 0.0030071020126342775, "grad_norm": 0.5912318885326385, "loss": 0.45566644370555875, "time": 1.562260627746582, "epoch": 675, "memory": 35394, "step": 211262}
{"lr": 9.877421203701094e-05, "data_time": 0.317606520652771, "grad_norm": 0.5619223088026046, "loss": 0.4601077735424042, "time": 1.5227688312530518, "epoch": 676, "memory": 35394, "step": 211375}
{"lr": 9.827927896758872e-05, "data_time": 0.002315664291381836, "grad_norm": 0.5481217861175537, "loss": 0.45242986381053923, "time": 1.5902018308639527, "epoch": 676, "memory": 35394, "step": 211475}
{"lr": 9.778550784459651e-05, "data_time": 0.002975320816040039, "grad_norm": 0.6169865220785141, "loss": 0.46260654628276826, "time": 1.5258801937103272, "epoch": 676, "memory": 35394, "step": 211575}
{"lr": 9.722894584656135e-05, "data_time": 0.26725614070892334, "grad_norm": 0.5745651870965958, "loss": 0.4545161217451096, "time": 1.5766063928604126, "epoch": 677, "memory": 35394, "step": 211688}
{"lr": 9.673765239251756e-05, "data_time": 0.002407336235046387, "grad_norm": 0.5328699380159378, "loss": 0.4584734678268433, "time": 1.530838131904602, "epoch": 677, "memory": 35394, "step": 211788}
{"lr": 9.624752343761102e-05, "data_time": 0.0024190664291381834, "grad_norm": 0.5815343976020813, "loss": 0.4580894440412521, "time": 1.5586114645004272, "epoch": 677, "memory": 35394, "step": 211888}
{"lr": 9.56950801539956e-05, "data_time": 0.22672924995422364, "grad_norm": 0.5615183532238006, "loss": 0.4569973826408386, "time": 1.5649680852890016, "epoch": 678, "memory": 35394, "step": 212001}
{"lr": 9.520743428519462e-05, "data_time": 0.002083873748779297, "grad_norm": 0.6346734285354614, "loss": 0.4611951053142548, "time": 1.5581304788589478, "epoch": 678, "memory": 35394, "step": 212101}
{"lr": 9.472095544934745e-05, "data_time": 0.0028358221054077147, "grad_norm": 0.5291532069444657, "loss": 0.45550725162029265, "time": 1.5641655445098877, "epoch": 678, "memory": 35394, "step": 212201}
{"lr": 9.417263984200148e-05, "data_time": 0.42909674644470214, "grad_norm": 0.5402138650417327, "loss": 0.45522319376468656, "time": 1.607773232460022, "epoch": 679, "memory": 35394, "step": 212314}
{"lr": 9.368864946913593e-05, "data_time": 0.002217817306518555, "grad_norm": 0.5557768315076828, "loss": 0.4519933104515076, "time": 1.4468154191970826, "epoch": 679, "memory": 35394, "step": 212414}
{"lr": 9.320582864410849e-05, "data_time": 0.0021313905715942385, "grad_norm": 0.5899712055921554, "loss": 0.45861652195453645, "time": 1.5463493585586547, "epoch": 679, "memory": 35394, "step": 212514}
{"lr": 9.26616496079212e-05, "data_time": 0.5004173040390014, "grad_norm": 0.5508352905511856, "loss": 0.4571260869503021, "time": 1.5675169944763183, "epoch": 680, "memory": 35394, "step": 212627}
{"lr": 9.218132258238332e-05, "data_time": 0.06487607955932617, "grad_norm": 0.5706379562616348, "loss": 0.46083061397075653, "time": 1.545665431022644, "epoch": 680, "memory": 35394, "step": 212727}
{"lr": 9.170216760059537e-05, "data_time": 0.0027361154556274415, "grad_norm": 0.6027405440807343, "loss": 0.4573623090982437, "time": 1.5533807516098022, "epoch": 680, "memory": 35394, "step": 212827}
{"lr": 9.116213396335243e-05, "data_time": 0.6061503171920777, "grad_norm": 0.5565765708684921, "loss": 0.45609150826931, "time": 1.5411153078079223, "epoch": 681, "memory": 35394, "step": 212940}
{"lr": 9.068547807710678e-05, "data_time": 0.22237279415130615, "grad_norm": 0.6120332300662994, "loss": 0.45483088195323945, "time": 1.5857033491134644, "epoch": 681, "memory": 35394, "step": 213040}
{"lr": 9.020999671150988e-05, "data_time": 0.0027760744094848635, "grad_norm": 0.5917708039283752, "loss": 0.4587224870920181, "time": 1.558027195930481, "epoch": 681, "memory": 35394, "step": 213140}
{"lr": 8.967411723374849e-05, "data_time": 0.2662954568862915, "grad_norm": 0.6223403453826905, "loss": 0.4514028489589691, "time": 1.553421139717102, "epoch": 682, "memory": 35394, "step": 213253}
{"lr": 8.92011402192058e-05, "data_time": 0.0021384000778198243, "grad_norm": 0.5979145646095276, "loss": 0.4544138163328171, "time": 1.5591777563095093, "epoch": 682, "memory": 35394, "step": 213353}
{"lr": 8.872934018315737e-05, "data_time": 0.0024817228317260743, "grad_norm": 0.5517389893531799, "loss": 0.4580913335084915, "time": 1.6588711500167848, "epoch": 682, "memory": 35394, "step": 213453}
{"lr": 8.81976235580259e-05, "data_time": 0.3747780561447144, "grad_norm": 0.5415186077356339, "loss": 0.4542349547147751, "time": 1.5512708187103272, "epoch": 683, "memory": 35394, "step": 213566}
{"lr": 8.772833308791766e-05, "data_time": 0.0019155979156494141, "grad_norm": 0.5290867745876312, "loss": 0.4659025460481644, "time": 1.5549160957336425, "epoch": 683, "memory": 35394, "step": 213666}
{"lr": 8.726022203505564e-05, "data_time": 0.0030971765518188477, "grad_norm": 0.5520845383405686, "loss": 0.46659691631793976, "time": 1.5650106430053712, "epoch": 683, "memory": 35394, "step": 213766}
{"lr": 8.673267688817139e-05, "data_time": 0.4626002788543701, "grad_norm": 0.553689593076706, "loss": 0.4539417952299118, "time": 1.560614776611328, "epoch": 684, "memory": 35394, "step": 213879}
{"lr": 8.626708057542489e-05, "data_time": 0.026269030570983887, "grad_norm": 0.5956557035446167, "loss": 0.45791822373867036, "time": 1.5923821449279785, "epoch": 684, "memory": 35394, "step": 213979}
{"lr": 8.580266609954433e-05, "data_time": 0.0024014711380004883, "grad_norm": 0.58118736743927, "loss": 0.4527595728635788, "time": 1.7422907590866088, "epoch": 684, "memory": 35394, "step": 214079}
{"lr": 8.527930098885413e-05, "data_time": 0.3486656188964844, "grad_norm": 0.5640002846717834, "loss": 0.4564574211835861, "time": 1.5404855489730835, "epoch": 685, "memory": 35394, "step": 214192}
{"lr": 8.481740638646945e-05, "data_time": 0.0023063182830810546, "grad_norm": 0.5313880413770675, "loss": 0.45632301867008207, "time": 1.559109091758728, "epoch": 685, "memory": 35394, "step": 214292}
{"lr": 8.43566960213981e-05, "data_time": 0.0027637720108032227, "grad_norm": 0.569439274072647, "loss": 0.4586038202047348, "time": 1.6135172843933105, "epoch": 685, "memory": 35394, "step": 214392}
{"lr": 8.383751943703953e-05, "data_time": 0.4448235511779785, "grad_norm": 0.6122017711400985, "loss": 0.4566127806901932, "time": 1.5550392150878907, "epoch": 686, "memory": 35394, "step": 214505}
{"lr": 8.337933403796622e-05, "data_time": 0.2194810390472412, "grad_norm": 0.5403511226177216, "loss": 0.45202714800834654, "time": 1.5494340658187866, "epoch": 686, "memory": 35394, "step": 214605}
{"lr": 8.292233525744319e-05, "data_time": 0.0023023605346679686, "grad_norm": 0.5945889115333557, "loss": 0.4591774672269821, "time": 1.5663992404937743, "epoch": 686, "memory": 35394, "step": 214705}
{"lr": 8.240735562160665e-05, "data_time": 0.37122962474822996, "grad_norm": 0.5748916000127793, "loss": 0.4546218901872635, "time": 1.6222759246826173, "epoch": 687, "memory": 35394, "step": 214818}
{"lr": 8.19528868586239e-05, "data_time": 0.002157926559448242, "grad_norm": 0.538899490237236, "loss": 0.4607570439577103, "time": 1.5762009382247926, "epoch": 687, "memory": 35394, "step": 214918}
{"lr": 8.149960707617834e-05, "data_time": 0.0021753787994384767, "grad_norm": 0.618314516544342, "loss": 0.4564055114984512, "time": 1.8048780918121339, "epoch": 687, "memory": 35394, "step": 215018}
{"lr": 8.098883274297025e-05, "data_time": 0.3625020027160645, "grad_norm": 0.6141729205846786, "loss": 0.4561437964439392, "time": 1.5836377382278441, "epoch": 688, "memory": 35394, "step": 215131}
{"lr": 8.053808798856478e-05, "data_time": 0.001971530914306641, "grad_norm": 0.5956287443637848, "loss": 0.4565456658601761, "time": 1.5201330423355102, "epoch": 688, "memory": 35394, "step": 215231}
{"lr": 8.00885345573951e-05, "data_time": 0.002691841125488281, "grad_norm": 0.5955277889966964, "loss": 0.4544478803873062, "time": 1.542170810699463, "epoch": 688, "memory": 35394, "step": 215331}
{"lr": 7.958197381270255e-05, "data_time": 0.47609875202178953, "grad_norm": 0.5781329542398452, "loss": 0.4592869520187378, "time": 1.5842019319534302, "epoch": 689, "memory": 35394, "step": 215444}
{"lr": 7.91349603789491e-05, "data_time": 0.2007617712020874, "grad_norm": 0.5359909802675247, "loss": 0.45530055165290834, "time": 1.533629584312439, "epoch": 689, "memory": 35394, "step": 215544}
{"lr": 7.86891405918044e-05, "data_time": 0.0021775484085083006, "grad_norm": 0.584776771068573, "loss": 0.45249241292476655, "time": 1.5773392200469971, "epoch": 689, "memory": 35394, "step": 215644}
{"lr": 7.818680165316113e-05, "data_time": 0.16954753398895264, "grad_norm": 0.5300792425870895, "loss": 0.45338834524154664, "time": 1.5209959983825683, "epoch": 690, "memory": 35394, "step": 215757}
{"lr": 7.774352679160425e-05, "data_time": 0.0020591259002685548, "grad_norm": 0.5956376612186431, "loss": 0.4521362066268921, "time": 1.5424150228500366, "epoch": 690, "memory": 35394, "step": 215857}
{"lr": 7.730144788066621e-05, "data_time": 0.0027124643325805663, "grad_norm": 0.6044490844011307, "loss": 0.45696496665477754, "time": 1.5998022079467773, "epoch": 690, "memory": 35394, "step": 215957}
{"lr": 7.680333889711849e-05, "data_time": 0.531805443763733, "grad_norm": 0.5697890490293502, "loss": 0.45385181307792666, "time": 1.5270342350006103, "epoch": 691, "memory": 35394, "step": 216070}
{"lr": 7.636380979865488e-05, "data_time": 0.11218860149383544, "grad_norm": 0.5308011800050736, "loss": 0.45552716255187986, "time": 1.5428216218948365, "epoch": 691, "memory": 35394, "step": 216170}
{"lr": 7.592547893541891e-05, "data_time": 0.0026600837707519533, "grad_norm": 0.6058697193861008, "loss": 0.4626846045255661, "time": 1.5068730354309081, "epoch": 691, "memory": 35394, "step": 216270}
{"lr": 7.54316079873942e-05, "data_time": 0.33205289840698243, "grad_norm": 0.539072361588478, "loss": 0.4548801302909851, "time": 1.535478901863098, "epoch": 692, "memory": 35394, "step": 216383}
{"lr": 7.499583178215629e-05, "data_time": 0.002102160453796387, "grad_norm": 0.5831077396869659, "loss": 0.45693217813968656, "time": 1.5506268739700317, "epoch": 692, "memory": 35394, "step": 216483}
{"lr": 7.456125607731673e-05, "data_time": 0.0021908760070800783, "grad_norm": 0.5603004068136215, "loss": 0.4548852264881134, "time": 1.2624903440475463, "epoch": 692, "memory": 35394, "step": 216583}
{"lr": 7.407163117649214e-05, "data_time": 0.40786170959472656, "grad_norm": 0.5584215372800827, "loss": 0.4497294545173645, "time": 1.5229271411895753, "epoch": 693, "memory": 35394, "step": 216696}
{"lr": 7.363961493373213e-05, "data_time": 0.002867770195007324, "grad_norm": 0.5378640621900559, "loss": 0.45555015802383425, "time": 1.498789381980896, "epoch": 693, "memory": 35394, "step": 216796}
{"lr": 7.3208801437066e-05, "data_time": 0.002856731414794922, "grad_norm": 0.5129430949687958, "loss": 0.4533175230026245, "time": 1.5198520421981812, "epoch": 693, "memory": 35394, "step": 216896}
{"lr": 7.272343052623806e-05, "data_time": 0.4914345979690552, "grad_norm": 0.5982498407363892, "loss": 0.4535418003797531, "time": 1.4941738843917847, "epoch": 694, "memory": 35394, "step": 217009}
{"lr": 7.229518125421286e-05, "data_time": 0.3242484092712402, "grad_norm": 0.584573045372963, "loss": 0.45951600968837736, "time": 1.5177598237991332, "epoch": 694, "memory": 35394, "step": 217109}
{"lr": 7.186813695446641e-05, "data_time": 0.0030931711196899416, "grad_norm": 0.5313993483781815, "loss": 0.4624104768037796, "time": 1.516062617301941, "epoch": 694, "memory": 35394, "step": 217209}
{"lr": 7.138702790742309e-05, "data_time": 0.37873687744140627, "grad_norm": 0.5568545520305633, "loss": 0.45995994806289675, "time": 1.5213543891906738, "epoch": 695, "memory": 35394, "step": 217322}
{"lr": 7.09625525532813e-05, "data_time": 0.0022063255310058594, "grad_norm": 0.5527456730604172, "loss": 0.4600161910057068, "time": 1.5771740436553956, "epoch": 695, "memory": 35394, "step": 217422}
{"lr": 7.053928437805548e-05, "data_time": 0.0032187938690185548, "grad_norm": 0.53965063393116, "loss": 0.4565676778554916, "time": 1.5802758932113647, "epoch": 695, "memory": 35394, "step": 217522}
{"lr": 7.006244499944724e-05, "data_time": 0.6218565225601196, "grad_norm": Infinity, "loss": 0.4581324726343155, "time": 1.551185417175293, "epoch": 696, "memory": 35394, "step": 217635}
{"lr": 6.964175044911624e-05, "data_time": 0.26150271892547605, "grad_norm": 0.5477576404809952, "loss": 0.45871880650520325, "time": 1.579701042175293, "epoch": 696, "memory": 35394, "step": 217735}
{"lr": 6.922226526475544e-05, "data_time": 0.003212857246398926, "grad_norm": 0.5832842081785202, "loss": 0.45910459458827974, "time": 1.573720121383667, "epoch": 696, "memory": 35394, "step": 217835}
{"lr": 6.874970328996984e-05, "data_time": 0.286489462852478, "grad_norm": 0.5409507483243943, "loss": 0.459547421336174, "time": 1.4367538213729858, "epoch": 697, "memory": 35394, "step": 217948}
{"lr": 6.833279636804399e-05, "data_time": 0.002306532859802246, "grad_norm": 0.5761056035757065, "loss": 0.4535706162452698, "time": 1.5506576538085937, "epoch": 697, "memory": 35394, "step": 218048}
{"lr": 6.791710097952377e-05, "data_time": 0.002251124382019043, "grad_norm": 0.5649667263031006, "loss": 0.46054313480854037, "time": 1.579276967048645, "epoch": 697, "memory": 35394, "step": 218148}
{"lr": 6.744882407455916e-05, "data_time": 0.14277224540710448, "grad_norm": 0.5677861571311951, "loss": 0.4537987053394318, "time": 1.5339370965957642, "epoch": 698, "memory": 35394, "step": 218261}
{"lr": 6.703571154418888e-05, "data_time": 0.0019439697265625, "grad_norm": 0.6124971807003021, "loss": 0.45060512125492097, "time": 1.5227384090423584, "epoch": 698, "memory": 35394, "step": 218361}
{"lr": 6.662381269500643e-05, "data_time": 0.002780580520629883, "grad_norm": 0.5754433870315552, "loss": 0.45763187408447265, "time": 1.5009173154830933, "epoch": 698, "memory": 35394, "step": 218461}
{"lr": 6.615982845634774e-05, "data_time": 0.5462795495986938, "grad_norm": 0.5656091451644898, "loss": 0.455308324098587, "time": 1.5322411060333252, "epoch": 699, "memory": 35394, "step": 218574}
{"lr": 6.57505170191303e-05, "data_time": 0.2706348419189453, "grad_norm": 0.5698564499616623, "loss": 0.45310413241386416, "time": 1.509150218963623, "epoch": 699, "memory": 35394, "step": 218674}
{"lr": 6.534242139119411e-05, "data_time": 0.0026666402816772463, "grad_norm": 0.595462754368782, "loss": 0.4596855819225311, "time": 1.4943341970443726, "epoch": 699, "memory": 35394, "step": 218774}
{"lr": 6.488273734569029e-05, "data_time": 0.5025430202484131, "grad_norm": 0.5473129332065583, "loss": 0.45954121053218844, "time": 1.529567337036133, "epoch": 700, "memory": 35394, "step": 218887}
{"lr": 6.447723364156047e-05, "data_time": 0.11841437816619874, "grad_norm": 0.5723539799451828, "loss": 0.45246249437332153, "time": 1.5105729341506957, "epoch": 700, "memory": 35394, "step": 218987}
{"lr": 6.407294785508239e-05, "data_time": 0.0028888702392578123, "grad_norm": 0.5992044001817703, "loss": 0.4587205082178116, "time": 1.4991544485092163, "epoch": 700, "memory": 35394, "step": 219087}
{"lr": 6.361757145982325e-05, "data_time": 0.5339600563049316, "grad_norm": 0.5461948633193969, "loss": 0.45630858540534974, "time": 1.538258719444275, "epoch": 701, "memory": 35394, "step": 219200}
{"lr": 6.321588206694653e-05, "data_time": 0.1854090452194214, "grad_norm": 0.538742282986641, "loss": 0.4639237284660339, "time": 1.4805978536605835, "epoch": 701, "memory": 35394, "step": 219300}
{"lr": 6.281541268033458e-05, "data_time": 0.002654433250427246, "grad_norm": 0.5560583025217056, "loss": 0.45715772211551664, "time": 1.5467002868652344, "epoch": 701, "memory": 35394, "step": 219400}
{"lr": 6.236435132253057e-05, "data_time": 0.22834346294403077, "grad_norm": 0.5625014960765838, "loss": 0.4544597566127777, "time": 1.5311184883117677, "epoch": 702, "memory": 35394, "step": 219513}
{"lr": 6.196648275719604e-05, "data_time": 0.0021106958389282226, "grad_norm": 0.5205055266618729, "loss": 0.4536254733800888, "time": 1.576956295967102, "epoch": 702, "memory": 35394, "step": 219613}
{"lr": 6.156983626694734e-05, "data_time": 0.0025600194931030273, "grad_norm": 0.5179480940103531, "loss": 0.4594214469194412, "time": 1.5585612535476685, "epoch": 702, "memory": 35394, "step": 219713}
{"lr": 6.112309726380927e-05, "data_time": 0.4106013298034668, "grad_norm": 0.5642590910196305, "loss": 0.4584375649690628, "time": 1.5405269384384155, "epoch": 703, "memory": 35394, "step": 219826}
{"lr": 6.072905598032298e-05, "data_time": 0.08554272651672364, "grad_norm": 0.5498950272798538, "loss": 0.45285109579563143, "time": 1.5195828676223755, "epoch": 703, "memory": 35394, "step": 219926}
{"lr": 6.033623882091949e-05, "data_time": 0.002634406089782715, "grad_norm": 0.5336647927761078, "loss": 0.4559327453374863, "time": 1.5340164184570313, "epoch": 703, "memory": 35394, "step": 220026}
{"lr": 5.989382941954034e-05, "data_time": 0.36972024440765383, "grad_norm": 0.5893033802509308, "loss": 0.459979447722435, "time": 1.5675640821456909, "epoch": 704, "memory": 35394, "step": 220139}
{"lr": 5.950362181012185e-05, "data_time": 0.05453002452850342, "grad_norm": 0.542397066950798, "loss": 0.4551346927881241, "time": 1.5183165311813354, "epoch": 704, "memory": 35394, "step": 220239}
{"lr": 5.911464035392522e-05, "data_time": 0.002297210693359375, "grad_norm": 0.5422476649284362, "loss": 0.4530853986740112, "time": 1.4955470323562623, "epoch": 704, "memory": 35394, "step": 220339}
{"lr": 5.867656773116204e-05, "data_time": 0.46508054733276366, "grad_norm": 0.5541498750448227, "loss": 0.454185226559639, "time": 1.5119826078414917, "epoch": 705, "memory": 35394, "step": 220452}
{"lr": 5.829020012583999e-05, "data_time": 0.10008955001831055, "grad_norm": 0.5366419225931167, "loss": 0.46602723300457, "time": 1.5163432359695435, "epoch": 705, "memory": 35394, "step": 220552}
{"lr": 5.790506068298809e-05, "data_time": 0.0033135175704956054, "grad_norm": 0.5427616238594055, "loss": 0.4549444705247879, "time": 1.4352983713150025, "epoch": 705, "memory": 35394, "step": 220652}
{"lr": 5.747133194534583e-05, "data_time": 0.13244261741638183, "grad_norm": 0.5224312454462051, "loss": 0.46228309273719786, "time": 1.561967635154724, "epoch": 706, "memory": 35394, "step": 220765}
{"lr": 5.7088810611855946e-05, "data_time": 0.002013397216796875, "grad_norm": 0.5481627613306046, "loss": 0.4595216929912567, "time": 1.52583167552948, "epoch": 706, "memory": 35394, "step": 220865}
{"lr": 5.670751943016059e-05, "data_time": 0.0030744075775146484, "grad_norm": 0.514602679014206, "loss": 0.45524724423885343, "time": 1.452300786972046, "epoch": 706, "memory": 35394, "step": 220965}
{"lr": 5.627814161367703e-05, "data_time": 0.4046666622161865, "grad_norm": 0.5987954318523407, "loss": 0.46333756744861604, "time": 1.558989405632019, "epoch": 707, "memory": 35394, "step": 221078}
{"lr": 5.589947275735945e-05, "data_time": 0.04399199485778808, "grad_norm": 0.5796533197164535, "loss": 0.4574493169784546, "time": 1.4963684558868409, "epoch": 707, "memory": 35394, "step": 221178}
{"lr": 5.552203602220536e-05, "data_time": 0.0025841474533081056, "grad_norm": 0.6111934065818787, "loss": 0.4632648915052414, "time": 1.5085460186004638, "epoch": 707, "memory": 35394, "step": 221278}
{"lr": 5.5097016092336364e-05, "data_time": 0.19243111610412597, "grad_norm": 0.591383022069931, "loss": 0.45395144522190095, "time": 1.50657377243042, "epoch": 708, "memory": 35394, "step": 221391}
{"lr": 5.472220585603616e-05, "data_time": 0.0022161245346069337, "grad_norm": 0.5365155547857284, "loss": 0.45785694420337675, "time": 1.56109561920166, "epoch": 708, "memory": 35394, "step": 221491}
{"lr": 5.4348629690280565e-05, "data_time": 0.0027573347091674806, "grad_norm": 0.5091679811477661, "loss": 0.46259700655937197, "time": 1.5593382358551025, "epoch": 708, "memory": 35394, "step": 221591}
{"lr": 5.392797454178778e-05, "data_time": 0.5305006504058838, "grad_norm": 0.5368288159370422, "loss": 0.45794416069984434, "time": 1.5100059509277344, "epoch": 709, "memory": 35394, "step": 221704}
{"lr": 5.35570290057542e-05, "data_time": 0.15957319736480713, "grad_norm": 0.5391562670469284, "loss": 0.44954555928707124, "time": 1.4824208974838258, "epoch": 709, "memory": 35394, "step": 221804}
{"lr": 5.3187319469626796e-05, "data_time": 0.0027053356170654297, "grad_norm": 0.5565433144569397, "loss": 0.45776606202125547, "time": 1.480976152420044, "epoch": 709, "memory": 35394, "step": 221904}
{"lr": 5.2771035926465206e-05, "data_time": 0.19032182693481445, "grad_norm": 0.5141705721616745, "loss": 0.45993857085704803, "time": 1.2676919221878051, "epoch": 710, "memory": 35394, "step": 222017}
{"lr": 5.24039611082539e-05, "data_time": 0.0022276163101196287, "grad_norm": 0.5041163235902786, "loss": 0.4606097936630249, "time": 1.5437156677246093, "epoch": 710, "memory": 35394, "step": 222117}
{"lr": 5.2038124199259776e-05, "data_time": 0.0021927356719970703, "grad_norm": 0.5256363332271576, "loss": 0.45536368787288667, "time": 1.6387242794036865, "epoch": 710, "memory": 35394, "step": 222217}
{"lr": 5.1626219014467454e-05, "data_time": 0.6512239933013916, "grad_norm": 0.5734946459531785, "loss": 0.4570034682750702, "time": 1.8140399217605592, "epoch": 711, "memory": 35394, "step": 222330}
{"lr": 5.126302086884231e-05, "data_time": 0.04292073249816895, "grad_norm": 0.4937557131052017, "loss": 0.4540249824523926, "time": 1.5672348499298097, "epoch": 711, "memory": 35394, "step": 222430}
{"lr": 5.090106252166352e-05, "data_time": 0.003088092803955078, "grad_norm": 0.5764116406440735, "loss": 0.4611122369766235, "time": 1.495295476913452, "epoch": 711, "memory": 35394, "step": 222530}
{"lr": 5.049354237725191e-05, "data_time": 0.4895296096801758, "grad_norm": 0.4871887505054474, "loss": 0.4492084503173828, "time": 1.539576029777527, "epoch": 712, "memory": 35394, "step": 222643}
{"lr": 5.013422679608853e-05, "data_time": 0.0020383596420288086, "grad_norm": 0.528139379620552, "loss": 0.4565781056880951, "time": 1.3797374486923217, "epoch": 712, "memory": 35394, "step": 222743}
{"lr": 4.9776152882488725e-05, "data_time": 0.002638411521911621, "grad_norm": 0.523543831706047, "loss": 0.45789754986763, "time": 1.5309797525405884, "epoch": 712, "memory": 35394, "step": 222843}
{"lr": 4.937302438933399e-05, "data_time": 0.25575253963470457, "grad_norm": Infinity, "loss": 0.45531654059886933, "time": 1.5412139654159547, "epoch": 713, "memory": 35394, "step": 222956}
{"lr": 4.901759720152422e-05, "data_time": 0.0026779413223266602, "grad_norm": 0.5096626698970794, "loss": 0.45701771080493925, "time": 1.5463239669799804, "epoch": 713, "memory": 35394, "step": 223056}
{"lr": 4.866341353025282e-05, "data_time": 0.0035072088241577147, "grad_norm": 0.4981076717376709, "loss": 0.45375974476337433, "time": 1.5163381338119506, "epoch": 713, "memory": 35394, "step": 223156}
{"lr": 4.8264683227989264e-05, "data_time": 0.002271556854248047, "grad_norm": 0.4885018438100815, "loss": 0.4556243658065796, "time": 1.5936621189117433, "epoch": 714, "memory": 35394, "step": 223269}
{"lr": 4.791315019934652e-05, "data_time": 0.0020493030548095702, "grad_norm": 0.5568123698234558, "loss": 0.4611264705657959, "time": 1.5605576038360596, "epoch": 714, "memory": 35394, "step": 223369}
{"lr": 4.756286251604441e-05, "data_time": 0.003084278106689453, "grad_norm": 0.5097839891910553, "loss": 0.4494420289993286, "time": 1.5771882772445678, "epoch": 714, "memory": 35394, "step": 223469}
{"lr": 4.7168536872957616e-05, "data_time": 0.3046875476837158, "grad_norm": 0.5678255975246429, "loss": 0.45392942428588867, "time": 1.5820075750350953, "epoch": 715, "memory": 35394, "step": 223582}
{"lr": 4.682090370612392e-05, "data_time": 0.002375936508178711, "grad_norm": 0.5984544157981873, "loss": 0.4532764405012131, "time": 1.5238636493682862, "epoch": 715, "memory": 35394, "step": 223682}
{"lr": 4.647451769323063e-05, "data_time": 0.0026926755905151366, "grad_norm": 0.5494679838418961, "loss": 0.4560258537530899, "time": 1.56510648727417, "epoch": 715, "memory": 35394, "step": 223782}
{"lr": 4.608460310615302e-05, "data_time": 0.38190159797668455, "grad_norm": 0.5327255249023437, "loss": 0.45668687522411344, "time": 1.562274408340454, "epoch": 716, "memory": 35394, "step": 223895}
{"lr": 4.57408754405059e-05, "data_time": 0.002072310447692871, "grad_norm": 0.5588442206382751, "loss": 0.45631595253944396, "time": 1.5198840141296386, "epoch": 716, "memory": 35394, "step": 223995}
{"lr": 4.5398396717166904e-05, "data_time": 0.002797698974609375, "grad_norm": 0.5369131118059158, "loss": 0.4582517147064209, "time": 1.2459093809127808, "epoch": 716, "memory": 35394, "step": 224095}
{"lr": 4.5012899511373836e-05, "data_time": 0.41409478187561033, "grad_norm": 0.49979824423789976, "loss": 0.45456331968307495, "time": 1.505222773551941, "epoch": 717, "memory": 35394, "step": 224208}
{"lr": 4.467308292293462e-05, "data_time": 0.14975054264068605, "grad_norm": 0.5409417629241944, "loss": 0.45680340826511384, "time": 1.5085799932479858, "epoch": 717, "memory": 35394, "step": 224308}
{"lr": 4.4334517044911015e-05, "data_time": 0.0024390935897827147, "grad_norm": 0.5386477112770081, "loss": 0.45152573883533476, "time": 1.5134351253509521, "epoch": 717, "memory": 35394, "step": 224408}
{"lr": 4.3953443474018424e-05, "data_time": 0.3247572422027588, "grad_norm": 0.49921731650829315, "loss": 0.45432551205158234, "time": 1.5236164808273316, "epoch": 718, "memory": 35394, "step": 224521}
{"lr": 4.3617543475362294e-05, "data_time": 0.002016448974609375, "grad_norm": 0.543315789103508, "loss": 0.4561192750930786, "time": 1.6157385110855103, "epoch": 718, "memory": 35394, "step": 224621}
{"lr": 4.328289593494021e-05, "data_time": 0.0023194551467895508, "grad_norm": 0.5321103453636169, "loss": 0.4586012840270996, "time": 1.5651135206222535, "epoch": 718, "memory": 35394, "step": 224721}
{"lr": 4.290625218080217e-05, "data_time": 0.4052824258804321, "grad_norm": 0.5688494473695755, "loss": 0.45806593298912046, "time": 1.5525596141815186, "epoch": 719, "memory": 35394, "step": 224834}
{"lr": 4.2574274220968635e-05, "data_time": 0.0026627302169799803, "grad_norm": 0.5056760609149933, "loss": 0.4534659504890442, "time": 1.530475902557373, "epoch": 719, "memory": 35394, "step": 224934}
{"lr": 4.224355044686993e-05, "data_time": 0.0029274702072143556, "grad_norm": 0.49369091391563413, "loss": 0.4569371521472931, "time": 1.5162411689758302, "epoch": 719, "memory": 35394, "step": 225034}
{"lr": 4.1871342619479763e-05, "data_time": 0.04770858287811279, "grad_norm": 0.5019683808088302, "loss": 0.46049195528030396, "time": 1.555790090560913, "epoch": 720, "memory": 35394, "step": 225147}
{"lr": 4.154329208388437e-05, "data_time": 0.0026836633682250977, "grad_norm": 0.5001223891973495, "loss": 0.45516360402107237, "time": 1.5481104135513306, "epoch": 720, "memory": 35394, "step": 225247}
{"lr": 4.1216497441178445e-05, "data_time": 0.0022833824157714845, "grad_norm": 0.5332807302474976, "loss": 0.45803314745426177, "time": 1.5573695421218872, "epoch": 720, "memory": 35394, "step": 225347}
{"lr": 4.084873157856887e-05, "data_time": 0.03179194927215576, "grad_norm": 0.5572584003210068, "loss": 0.4586448848247528, "time": 1.2587498664855956, "epoch": 721, "memory": 35394, "step": 225460}
{"lr": 4.052461378891527e-05, "data_time": 0.0022707700729370115, "grad_norm": 0.4944674760103226, "loss": 0.45955754816532135, "time": 1.1866408348083497, "epoch": 721, "memory": 35394, "step": 225560}
{"lr": 4.020175357893284e-05, "data_time": 0.0028867244720458983, "grad_norm": 0.5367462933063507, "loss": 0.45317163169384, "time": 1.2212537050247192, "epoch": 721, "memory": 35394, "step": 225660}
{"lr": 3.983843564707793e-05, "data_time": 0.002303004264831543, "grad_norm": 0.5380952715873718, "loss": 0.4545513868331909, "time": 1.220337200164795, "epoch": 722, "memory": 35394, "step": 225773}
{"lr": 3.951825586127218e-05, "data_time": 0.0020340681076049805, "grad_norm": 0.5091318011283874, "loss": 0.4567444145679474, "time": 1.238345170021057, "epoch": 722, "memory": 35394, "step": 225873}
{"lr": 3.919933532151859e-05, "data_time": 0.002195143699645996, "grad_norm": 0.4998218476772308, "loss": 0.45918282568454744, "time": 1.2217140674591065, "epoch": 722, "memory": 35394, "step": 225973}
{"lr": 3.884047121423734e-05, "data_time": 0.0019845008850097657, "grad_norm": 0.5352770507335662, "loss": 0.4614618867635727, "time": 1.2723587512969972, "epoch": 723, "memory": 35394, "step": 226086}
{"lr": 3.8524234626302306e-05, "data_time": 0.0020815134048461914, "grad_norm": 0.5116759568452836, "loss": 0.4542040079832077, "time": 1.2236539840698242, "epoch": 723, "memory": 35394, "step": 226186}
{"lr": 3.8209258930372673e-05, "data_time": 0.0027936697006225586, "grad_norm": 0.5205170422792434, "loss": 0.45348210632801056, "time": 1.2286622047424316, "epoch": 723, "memory": 35394, "step": 226286}
{"lr": 3.785485446923302e-05, "data_time": 0.0022462129592895506, "grad_norm": 0.45331872403621676, "loss": 0.45867995619773866, "time": 1.2216429233551025, "epoch": 724, "memory": 35394, "step": 226399}
{"lr": 3.7542566209224425e-05, "data_time": 0.002256298065185547, "grad_norm": 0.4668492257595062, "loss": 0.4588841497898102, "time": 1.2203102111816406, "epoch": 724, "memory": 35394, "step": 226499}
{"lr": 3.723154046671973e-05, "data_time": 0.0025942325592041016, "grad_norm": 0.5336043685674667, "loss": 0.45257320404052737, "time": 1.1931265115737915, "epoch": 724, "memory": 35394, "step": 226599}
{"lr": 3.688160140094447e-05, "data_time": 0.26191163063049316, "grad_norm": 0.5057969689369202, "loss": 0.4592909187078476, "time": 1.1992800951004028, "epoch": 725, "memory": 35394, "step": 226712}
{"lr": 3.6573266534867314e-05, "data_time": 0.0023410558700561524, "grad_norm": 0.49498191475868225, "loss": 0.4556594640016556, "time": 1.2387784957885741, "epoch": 725, "memory": 35394, "step": 226812}
{"lr": 3.626619579131173e-05, "data_time": 0.003068995475769043, "grad_norm": 0.553495568037033, "loss": 0.4527524173259735, "time": 1.222077441215515, "epoch": 725, "memory": 35394, "step": 226912}
{"lr": 3.5920727797684805e-05, "data_time": 0.13628127574920654, "grad_norm": 0.47072103023529055, "loss": 0.46123590171337125, "time": 1.216002058982849, "epoch": 726, "memory": 35394, "step": 227025}
{"lr": 3.5616351327411275e-05, "data_time": 0.0022928714752197266, "grad_norm": 0.5080169647932052, "loss": 0.4512509912252426, "time": 1.2167171478271483, "epoch": 726, "memory": 35394, "step": 227125}
{"lr": 3.531324056417043e-05, "data_time": 0.0021839380264282227, "grad_norm": 0.5564034402370452, "loss": 0.4588650822639465, "time": 1.2062527179718017, "epoch": 726, "memory": 35394, "step": 227225}
{"lr": 3.4972249246945365e-05, "data_time": 0.14162733554840087, "grad_norm": 0.49377517998218534, "loss": 0.4574218451976776, "time": 1.2431634187698364, "epoch": 727, "memory": 35394, "step": 227338}
{"lr": 3.467183611013358e-05, "data_time": 0.002260923385620117, "grad_norm": 0.5693681359291076, "loss": 0.46180957853794097, "time": 1.2087029218673706, "epoch": 727, "memory": 35394, "step": 227438}
{"lr": 3.437269024433303e-05, "data_time": 0.002327251434326172, "grad_norm": 0.5167959660291672, "loss": 0.4524044066667557, "time": 1.1945968866348267, "epoch": 727, "memory": 35394, "step": 227538}
{"lr": 3.403618113514199e-05, "data_time": 0.030752801895141603, "grad_norm": 0.5310011088848114, "loss": 0.4572610408067703, "time": 1.241688847541809, "epoch": 728, "memory": 35394, "step": 227651}
{"lr": 3.3739736205156e-05, "data_time": 0.0021556854248046876, "grad_norm": 0.5495516419410705, "loss": 0.4603972971439362, "time": 1.1804394006729126, "epoch": 728, "memory": 35394, "step": 227751}
{"lr": 3.3444560089602004e-05, "data_time": 0.002882838249206543, "grad_norm": 0.4816761016845703, "loss": 0.4579925090074539, "time": 1.24178466796875, "epoch": 728, "memory": 35394, "step": 227851}
{"lr": 3.311253864736538e-05, "data_time": 0.08129746913909912, "grad_norm": 0.5349743962287903, "loss": 0.4567969232797623, "time": 1.1862721920013428, "epoch": 729, "memory": 35394, "step": 227964}
{"lr": 3.2820066733196654e-05, "data_time": 0.0020899295806884764, "grad_norm": 0.5028551459312439, "loss": 0.457634711265564, "time": 1.2692838668823243, "epoch": 729, "memory": 35394, "step": 228064}
{"lr": 3.252886515629747e-05, "data_time": 0.00284724235534668, "grad_norm": 0.5320564806461334, "loss": 0.453906187415123, "time": 1.2343107223510743, "epoch": 729, "memory": 35394, "step": 228164}
{"lr": 3.220133676713609e-05, "data_time": 0.20800743103027344, "grad_norm": 0.5647247612476349, "loss": 0.46100605130195615, "time": 1.2310749769210816, "epoch": 730, "memory": 35394, "step": 228277}
{"lr": 3.191284261332479e-05, "data_time": 0.0027318477630615236, "grad_norm": 0.47700701355934144, "loss": 0.4545932561159134, "time": 1.2182065725326539, "epoch": 730, "memory": 35394, "step": 228377}
{"lr": 3.162562029901264e-05, "data_time": 0.0022757768630981444, "grad_norm": 0.491078245639801, "loss": 0.45230538547039034, "time": 1.1670363903045655, "epoch": 730, "memory": 35394, "step": 228477}
{"lr": 3.130259027615989e-05, "data_time": 0.002175259590148926, "grad_norm": 0.5088062584400177, "loss": 0.45437271893024445, "time": 1.283573365211487, "epoch": 731, "memory": 35394, "step": 228590}
{"lr": 3.101807856271795e-05, "data_time": 0.0020197153091430662, "grad_norm": 0.5179643273353577, "loss": 0.4527447998523712, "time": 1.2196152687072754, "epoch": 731, "memory": 35394, "step": 228690}
{"lr": 3.073484017037301e-05, "data_time": 0.0024310827255249025, "grad_norm": 0.5301176846027374, "loss": 0.45252170264720915, "time": 1.1766754388809204, "epoch": 731, "memory": 35394, "step": 228790}
{"lr": 3.0416313754088982e-05, "data_time": 0.08553140163421631, "grad_norm": 0.4883792906999588, "loss": 0.4570275664329529, "time": 1.2436346530914306, "epoch": 732, "memory": 35394, "step": 228903}
{"lr": 3.0135789096424415e-05, "data_time": 0.0025870800018310547, "grad_norm": 0.497970899939537, "loss": 0.4580935925245285, "time": 1.2192219018936157, "epoch": 732, "memory": 35394, "step": 229003}
{"lr": 2.9856539220798533e-05, "data_time": 0.003062796592712402, "grad_norm": 0.4776893645524979, "loss": 0.4593217670917511, "time": 1.235948872566223, "epoch": 732, "memory": 35394, "step": 229103}
{"lr": 2.9542521578285058e-05, "data_time": 0.022487854957580565, "grad_norm": 0.5221052795648575, "loss": 0.4593021124601364, "time": 1.2800060033798217, "epoch": 733, "memory": 35394, "step": 229216}
{"lr": 2.9265988527127093e-05, "data_time": 0.0019956111907958986, "grad_norm": 0.4706000298261642, "loss": 0.458283519744873, "time": 1.2357422590255738, "epoch": 733, "memory": 35394, "step": 229316}
{"lr": 2.8990731698269925e-05, "data_time": 0.0028409719467163085, "grad_norm": 0.49544491767883303, "loss": 0.45482473969459536, "time": 1.2120206117630006, "epoch": 733, "memory": 35394, "step": 229416}
{"lr": 2.8681227923586104e-05, "data_time": 0.011756682395935058, "grad_norm": 0.5351995527744293, "loss": 0.4555841386318207, "time": 1.1913207530975343, "epoch": 734, "memory": 35394, "step": 229529}
{"lr": 2.8408690964911338e-05, "data_time": 0.0027702808380126952, "grad_norm": 0.5009107440710068, "loss": 0.4602238118648529, "time": 1.1914399147033692, "epoch": 734, "memory": 35394, "step": 229629}
{"lr": 2.8137431648096073e-05, "data_time": 0.003385329246520996, "grad_norm": 0.49575802087783816, "loss": 0.4628932446241379, "time": 1.159415078163147, "epoch": 734, "memory": 35394, "step": 229729}
{"lr": 2.7832446762076563e-05, "data_time": 0.0020535945892333984, "grad_norm": 0.513840064406395, "loss": 0.4613390415906906, "time": 1.2194476842880249, "epoch": 735, "memory": 35394, "step": 229842}
{"lr": 2.7563910317035938e-05, "data_time": 0.002176070213317871, "grad_norm": 0.49759999811649325, "loss": 0.45749823153018954, "time": 1.1597704648971559, "epoch": 735, "memory": 35394, "step": 229942}
{"lr": 2.7296652912687627e-05, "data_time": 0.0030076980590820314, "grad_norm": 0.47133580446243284, "loss": 0.45766826868057253, "time": 1.1735164880752564, "epoch": 735, "memory": 35394, "step": 230042}
{"lr": 2.6996191862860435e-05, "data_time": 0.002611541748046875, "grad_norm": 0.515514811873436, "loss": 0.4624167770147324, "time": 1.2358676433563232, "epoch": 736, "memory": 35394, "step": 230155}
{"lr": 2.6731660287708e-05, "data_time": 0.0029412031173706053, "grad_norm": 0.5311740696430206, "loss": 0.4543749362230301, "time": 1.1822577476501466, "epoch": 736, "memory": 35394, "step": 230255}
{"lr": 2.64684091313318e-05, "data_time": 0.0028881072998046876, "grad_norm": 0.5190307527780533, "loss": 0.45301136672496795, "time": 1.1503897666931153, "epoch": 736, "memory": 35394, "step": 230355}
{"lr": 2.6172476791838463e-05, "data_time": 0.0020620107650756838, "grad_norm": 0.5174270361661911, "loss": 0.45595809519290925, "time": 1.2078063488006592, "epoch": 737, "memory": 35394, "step": 230468}
{"lr": 2.591195437786031e-05, "data_time": 0.0022440433502197267, "grad_norm": 0.4996886610984802, "loss": 0.4593434989452362, "time": 1.188593339920044, "epoch": 737, "memory": 35394, "step": 230568}
{"lr": 2.5652713739970817e-05, "data_time": 0.0024565696716308595, "grad_norm": 0.49406052231788633, "loss": 0.4589416027069092, "time": 1.1741783618927002, "epoch": 737, "memory": 35394, "step": 230668}
{"lr": 2.53613149114871e-05, "data_time": 0.1797710657119751, "grad_norm": 0.49159803092479704, "loss": 0.4550823450088501, "time": 1.2465140581130982, "epoch": 738, "memory": 35394, "step": 230781}
{"lr": 2.5104805884932014e-05, "data_time": 0.002189350128173828, "grad_norm": 0.5387861162424088, "loss": 0.4596723049879074, "time": 1.2398516178131103, "epoch": 738, "memory": 35394, "step": 230881}
{"lr": 2.4849579970984814e-05, "data_time": 0.0023797273635864256, "grad_norm": 0.5040254920721055, "loss": 0.4650684386491776, "time": 1.2143182277679443, "epoch": 738, "memory": 35394, "step": 230981}
{"lr": 2.4562719380643077e-05, "data_time": 0.07187745571136475, "grad_norm": 0.49862401783466337, "loss": 0.45490625500679016, "time": 1.2658910274505615, "epoch": 739, "memory": 35394, "step": 231094}
{"lr": 2.4310227902653708e-05, "data_time": 0.002372932434082031, "grad_norm": 0.4930765926837921, "loss": 0.459613898396492, "time": 1.1759369611740111, "epoch": 739, "memory": 35394, "step": 231194}
{"lr": 2.4059020852976482e-05, "data_time": 0.0028850555419921873, "grad_norm": 0.5088566780090332, "loss": 0.45695045590400696, "time": 1.1266271829605103, "epoch": 739, "memory": 35394, "step": 231294}
{"lr": 2.377670315428871e-05, "data_time": 0.09471101760864258, "grad_norm": 0.48961214125156405, "loss": 0.45202162861824036, "time": 1.2292840003967285, "epoch": 740, "memory": 35394, "step": 231407}
{"lr": 2.3528233320834078e-05, "data_time": 0.002410221099853516, "grad_norm": 0.46810998022556305, "loss": 0.45560001134872435, "time": 1.2720106601715089, "epoch": 740, "memory": 35394, "step": 231507}
{"lr": 2.32810492105599e-05, "data_time": 0.0024301767349243163, "grad_norm": 0.48432927429676054, "loss": 0.4554151356220245, "time": 1.2273824214935303, "epoch": 740, "memory": 35394, "step": 231607}
{"lr": 2.3003278983342263e-05, "data_time": 0.001989865303039551, "grad_norm": 0.4924259513616562, "loss": 0.4539645344018936, "time": 1.2410689830780028, "epoch": 741, "memory": 35394, "step": 231720}
{"lr": 2.275883482515162e-05, "data_time": 0.0024083614349365234, "grad_norm": 0.48730342388153075, "loss": 0.4555593699216843, "time": 1.1858694791793822, "epoch": 741, "memory": 35394, "step": 231820}
{"lr": 2.2515677664152383e-05, "data_time": 0.002436184883117676, "grad_norm": 0.4631304830312729, "loss": 0.4547895520925522, "time": 1.220659065246582, "epoch": 741, "memory": 35394, "step": 231920}
{"lr": 2.224245941445145e-05, "data_time": 0.19065141677856445, "grad_norm": 0.5026474535465241, "loss": 0.45688203871250155, "time": 1.1987359762191772, "epoch": 742, "memory": 35394, "step": 232033}
{"lr": 2.2002044896948495e-05, "data_time": 0.0023236989974975584, "grad_norm": 0.5091525703668595, "loss": 0.4557323962450027, "time": 1.262436580657959, "epoch": 742, "memory": 35394, "step": 232133}
{"lr": 2.176291862977003e-05, "data_time": 0.0031154632568359377, "grad_norm": 0.517581582069397, "loss": 0.4523158103227615, "time": 1.2267873525619506, "epoch": 742, "memory": 35394, "step": 232233}
{"lr": 2.149425678978876e-05, "data_time": 0.020188117027282716, "grad_norm": 0.5425441056489945, "loss": 0.452598312497139, "time": 1.2162896156311036, "epoch": 743, "memory": 35394, "step": 232346}
{"lr": 2.125787581302793e-05, "data_time": 0.0021115541458129883, "grad_norm": 0.4950500547885895, "loss": 0.4577938824892044, "time": 1.2060125350952149, "epoch": 743, "memory": 35394, "step": 232446}
{"lr": 2.102278431882613e-05, "data_time": 0.0024753093719482424, "grad_norm": 0.4883805990219116, "loss": 0.45627800524234774, "time": 1.192685890197754, "epoch": 743, "memory": 35394, "step": 232546}
{"lr": 2.075868324685258e-05, "data_time": 0.0020418882369995115, "grad_norm": 0.5218871891498565, "loss": 0.4567844122648239, "time": 1.3523946285247803, "epoch": 744, "memory": 35394, "step": 232659}
{"lr": 2.0526339645455207e-05, "data_time": 0.0019941091537475585, "grad_norm": 0.5386616617441178, "loss": 0.4626105934381485, "time": 1.0929715633392334, "epoch": 744, "memory": 35394, "step": 232759}
{"lr": 2.0295286737933054e-05, "data_time": 0.0025949954986572267, "grad_norm": 0.4553263157606125, "loss": 0.4591415494680405, "time": 1.2095853567123414, "epoch": 744, "memory": 35394, "step": 232859}
{"lr": 2.0035750718269698e-05, "data_time": 0.0021071434020996094, "grad_norm": 0.4976614236831665, "loss": 0.4523118108510971, "time": 1.3063188314437866, "epoch": 745, "memory": 35394, "step": 232972}
{"lr": 1.980744826136166e-05, "data_time": 0.0021076440811157227, "grad_norm": 0.49421408772468567, "loss": 0.4590300142765045, "time": 1.2275598287582397, "epoch": 745, "memory": 35394, "step": 233072}
{"lr": 1.958043768870763e-05, "data_time": 0.0023838281631469727, "grad_norm": 0.48890824615955353, "loss": 0.4609225630760193, "time": 1.266980767250061, "epoch": 745, "memory": 35394, "step": 233172}
{"lr": 1.932547093160166e-05, "data_time": 0.1948172092437744, "grad_norm": 0.5079566091299057, "loss": 0.45135039687156675, "time": 1.2119190454483033, "epoch": 746, "memory": 35394, "step": 233285}
{"lr": 1.910121332275279e-05, "data_time": 0.002633976936340332, "grad_norm": 0.5009071439504623, "loss": 0.4604244023561478, "time": 1.1950423002243042, "epoch": 746, "memory": 35394, "step": 233385}
{"lr": 1.8878248767579278e-05, "data_time": 0.00308842658996582, "grad_norm": 0.4820772707462311, "loss": 0.45523586869239807, "time": 1.2514566421508788, "epoch": 746, "memory": 35394, "step": 233485}
{"lr": 1.8627855409154402e-05, "data_time": 0.0023125171661376952, "grad_norm": 0.4666625291109085, "loss": 0.4571136862039566, "time": 1.2294832706451415, "epoch": 747, "memory": 35394, "step": 233598}
{"lr": 1.8407646286318304e-05, "data_time": 0.002132225036621094, "grad_norm": 0.48705661594867705, "loss": 0.45249633193016053, "time": 1.6260590076446533, "epoch": 747, "memory": 35394, "step": 233698}
{"lr": 1.8188731365602695e-05, "data_time": 0.0025305747985839844, "grad_norm": 0.5007063388824463, "loss": 0.45892607867717744, "time": 1.1955593824386597, "epoch": 747, "memory": 35394, "step": 233798}
{"lr": 1.7942915467791755e-05, "data_time": 0.10117096900939941, "grad_norm": 0.4685743242502213, "loss": 0.45564886927604675, "time": 1.2598325729370117, "epoch": 748, "memory": 35394, "step": 233911}
{"lr": 1.772675840324656e-05, "data_time": 0.0022591590881347657, "grad_norm": 0.4949582576751709, "loss": 0.45524092614650724, "time": 1.1811591625213622, "epoch": 748, "memory": 35394, "step": 234011}
{"lr": 1.7511896668272004e-05, "data_time": 0.0031427860260009764, "grad_norm": 0.48526577055454256, "loss": 0.4513464212417603, "time": 1.2872654676437378, "epoch": 748, "memory": 35394, "step": 234111}
{"lr": 1.7270662218751663e-05, "data_time": 0.0026527881622314454, "grad_norm": 0.47747116088867186, "loss": 0.45916723012924193, "time": 1.2153661489486693, "epoch": 749, "memory": 35394, "step": 234224}
{"lr": 1.7058560719041902e-05, "data_time": 0.0023294925689697266, "grad_norm": 0.4841349691152573, "loss": 0.4574483335018158, "time": 1.1866240739822387, "epoch": 749, "memory": 35394, "step": 234324}
{"lr": 1.6847755655340147e-05, "data_time": 0.0029967308044433595, "grad_norm": 0.48783566653728483, "loss": 0.4555568277835846, "time": 1.225005578994751, "epoch": 749, "memory": 35394, "step": 234424}
{"lr": 1.661110656746568e-05, "data_time": 0.002352309226989746, "grad_norm": 0.5001368939876556, "loss": 0.4561796247959137, "time": 1.2382351160049438, "epoch": 750, "memory": 35394, "step": 234537}
{"lr": 1.6403064073346172e-05, "data_time": 0.0029647111892700194, "grad_norm": 0.49217348694801333, "loss": 0.45309601724147797, "time": 1.2486275911331177, "epoch": 750, "memory": 35394, "step": 234637}
{"lr": 1.6196319100640695e-05, "data_time": 0.003520822525024414, "grad_norm": 0.48343080580234526, "loss": 0.45414180755615235, "time": 1.2319196462631226, "epoch": 750, "memory": 35394, "step": 234737}
{"lr": 1.5964259213382704e-05, "data_time": 0.002765035629272461, "grad_norm": 0.48316024243831635, "loss": 0.4579133152961731, "time": 1.2478445529937745, "epoch": 751, "memory": 35394, "step": 234850}
{"lr": 1.576027909976176e-05, "data_time": 0.002225518226623535, "grad_norm": 0.49366585314273836, "loss": 0.45674494206905364, "time": 1.2660625219345092, "epoch": 751, "memory": 35394, "step": 234950}
{"lr": 1.5557597571912424e-05, "data_time": 0.0023050546646118165, "grad_norm": 0.4876679092645645, "loss": 0.4614153802394867, "time": 1.2194528579711914, "epoch": 751, "memory": 35394, "step": 235050}
{"lr": 1.5330130649794474e-05, "data_time": 0.0022267818450927733, "grad_norm": 0.5213934123516083, "loss": 0.4586344301700592, "time": 1.2503597021102906, "epoch": 752, "memory": 35394, "step": 235163}
{"lr": 1.513021622567992e-05, "data_time": 0.0027076244354248048, "grad_norm": 0.4926042199134827, "loss": 0.46053455770015717, "time": 1.281997561454773, "epoch": 752, "memory": 35394, "step": 235263}
{"lr": 1.4931601430628563e-05, "data_time": 0.00246584415435791, "grad_norm": 0.5077041119337082, "loss": 0.4603695750236511, "time": 1.2946646213531494, "epoch": 752, "memory": 35394, "step": 235363}
{"lr": 1.470873116366632e-05, "data_time": 0.002002406120300293, "grad_norm": 0.46317767798900605, "loss": 0.4594501882791519, "time": 1.2092432737350465, "epoch": 753, "memory": 35394, "step": 235476}
{"lr": 1.4512885672111521e-05, "data_time": 0.0025536060333251954, "grad_norm": 0.47316637337207795, "loss": 0.4616743177175522, "time": 1.2441777229309081, "epoch": 753, "memory": 35394, "step": 235576}
{"lr": 1.4318340831828305e-05, "data_time": 0.0022228002548217774, "grad_norm": 0.44374169409275055, "loss": 0.45853114426136016, "time": 1.2166420936584472, "epoch": 753, "memory": 35394, "step": 235676}
{"lr": 1.4100070835469625e-05, "data_time": 0.002216529846191406, "grad_norm": 0.45825751423835753, "loss": 0.4511731445789337, "time": 1.2125354290008545, "epoch": 754, "memory": 35394, "step": 235789}
{"lr": 1.3908297453520617e-05, "data_time": 0.0021306276321411133, "grad_norm": 0.4825650125741959, "loss": 0.46402991712093355, "time": 1.2638232946395873, "epoch": 754, "memory": 35394, "step": 235889}
{"lr": 1.3717825723952688e-05, "data_time": 0.002347850799560547, "grad_norm": 0.4882447302341461, "loss": 0.457695797085762, "time": 1.159736728668213, "epoch": 754, "memory": 35394, "step": 235989}
{"lr": 1.3504159539018545e-05, "data_time": 0.07969961166381836, "grad_norm": 0.4687981367111206, "loss": 0.4571630358695984, "time": 1.202296209335327, "epoch": 755, "memory": 35394, "step": 236102}
{"lr": 1.3316461377662972e-05, "data_time": 0.002645134925842285, "grad_norm": 0.4809169560670853, "loss": 0.4469866633415222, "time": 1.2380218505859375, "epoch": 755, "memory": 35394, "step": 236202}
{"lr": 1.3130065848682158e-05, "data_time": 0.002426600456237793, "grad_norm": 0.5105459809303283, "loss": 0.45173138976097105, "time": 1.2185352325439454, "epoch": 755, "memory": 35394, "step": 236302}
{"lr": 1.292100694130986e-05, "data_time": 0.0020592212677001953, "grad_norm": 0.48466735780239106, "loss": 0.45249004662036896, "time": 1.2909296751022339, "epoch": 756, "memory": 35394, "step": 236415}
{"lr": 1.273738704542619e-05, "data_time": 0.002145862579345703, "grad_norm": 0.46809524595737456, "loss": 0.4598705291748047, "time": 1.1801633358001709, "epoch": 756, "memory": 35394, "step": 236515}
{"lr": 1.2555070740779426e-05, "data_time": 0.0023132085800170897, "grad_norm": 0.5032975256443024, "loss": 0.44530673027038575, "time": 1.3451532125473022, "epoch": 756, "memory": 35394, "step": 236615}
{"lr": 1.2350622502365942e-05, "data_time": 0.0022490262985229493, "grad_norm": 0.44360176026821135, "loss": 0.4547955125570297, "time": 1.235062265396118, "epoch": 757, "memory": 35394, "step": 236728}
{"lr": 1.2171083850674286e-05, "data_time": 0.0021051883697509764, "grad_norm": 0.4620935320854187, "loss": 0.4537178009748459, "time": 1.1865893602371216, "epoch": 757, "memory": 35394, "step": 236828}
{"lr": 1.1992849727934428e-05, "data_time": 0.0031885862350463866, "grad_norm": 0.4766137361526489, "loss": 0.46263388991355897, "time": 1.200262188911438, "epoch": 757, "memory": 35394, "step": 236928}
{"lr": 1.1793015475081496e-05, "data_time": 0.10528593063354492, "grad_norm": 0.45070370733737947, "loss": 0.4584424525499344, "time": 1.2546849727630616, "epoch": 758, "memory": 35394, "step": 237041}
{"lr": 1.1617560980095209e-05, "data_time": 0.0023764848709106447, "grad_norm": 0.5170366615056992, "loss": 0.4617684930562973, "time": 1.2726133584976196, "epoch": 758, "memory": 35394, "step": 237141}
{"lr": 1.1443411930613149e-05, "data_time": 0.002628612518310547, "grad_norm": 0.4937977075576782, "loss": 0.4516401648521423, "time": 1.2361243963241577, "epoch": 758, "memory": 35394, "step": 237241}
{"lr": 1.124819490507349e-05, "data_time": 0.0020548343658447266, "grad_norm": 0.47366646826267245, "loss": 0.4555442988872528, "time": 1.236396360397339, "epoch": 759, "memory": 35394, "step": 237354}
{"lr": 1.1076827413051803e-05, "data_time": 0.0022173166275024415, "grad_norm": 0.4555966168642044, "loss": 0.4559842973947525, "time": 1.1938853502273559, "epoch": 759, "memory": 35394, "step": 237454}
{"lr": 1.0906766261909578e-05, "data_time": 0.00286717414855957, "grad_norm": 0.49826652407646177, "loss": 0.4583759009838104, "time": 1.2459460735321044, "epoch": 759, "memory": 35394, "step": 237554}
{"lr": 1.0716169630534363e-05, "data_time": 0.0021509408950805666, "grad_norm": 0.47929236590862273, "loss": 0.45843123495578764, "time": 1.2427840948104858, "epoch": 760, "memory": 35394, "step": 237667}
{"lr": 1.05488919214361e-05, "data_time": 0.002151679992675781, "grad_norm": 0.4456953495740891, "loss": 0.4593126207590103, "time": 1.197779130935669, "epoch": 760, "memory": 35394, "step": 237767}
{"lr": 1.0382921427401152e-05, "data_time": 0.002437925338745117, "grad_norm": 0.46963649392127993, "loss": 0.4482364535331726, "time": 1.4209903240203858, "epoch": 760, "memory": 35394, "step": 237867}
{"lr": 1.01969482820883e-05, "data_time": 0.0023431301116943358, "grad_norm": 0.4658662289381027, "loss": 0.4563570380210876, "time": 1.2138072013854981, "epoch": 761, "memory": 35394, "step": 237980}
{"lr": 1.0033763069527437e-05, "data_time": 0.0026917457580566406, "grad_norm": 0.4784101456403732, "loss": 0.4636663913726807, "time": 1.2085198402404784, "epoch": 761, "memory": 35394, "step": 238080}
{"lr": 9.871885925007475e-06, "data_time": 0.0024605512619018553, "grad_norm": 0.47879006862640383, "loss": 0.46331424415111544, "time": 1.1883445978164673, "epoch": 761, "memory": 35394, "step": 238180}
{"lr": 9.69053928265208e-06, "data_time": 0.0023003816604614258, "grad_norm": 0.4764758497476578, "loss": 0.45600731670856476, "time": 1.2221110105514525, "epoch": 762, "memory": 35394, "step": 238293}
{"lr": 9.531449213852813e-06, "data_time": 0.002603483200073242, "grad_norm": 0.4604395776987076, "loss": 0.4574778378009796, "time": 1.2298051118850708, "epoch": 762, "memory": 35394, "step": 238393}
{"lr": 9.37366804485241e-06, "data_time": 0.0022984743118286133, "grad_norm": 0.48440979719161986, "loss": 0.45776540637016294, "time": 1.2114455699920654, "epoch": 762, "memory": 35394, "step": 238493}
{"lr": 9.196950847297384e-06, "data_time": 0.0020860671997070313, "grad_norm": 0.42727472484111784, "loss": 0.451232385635376, "time": 1.2679205417633057, "epoch": 763, "memory": 35394, "step": 238606}
{"lr": 9.041958503051828e-06, "data_time": 0.002038860321044922, "grad_norm": 0.46919662654399874, "loss": 0.4592111766338348, "time": 1.249860119819641, "epoch": 763, "memory": 35394, "step": 238706}
{"lr": 8.88827586912985e-06, "data_time": 0.0028026819229125975, "grad_norm": 0.4674059718847275, "loss": 0.4533165514469147, "time": 1.3763860940933228, "epoch": 763, "memory": 35394, "step": 238806}
{"lr": 8.716190983118159e-06, "data_time": 0.0021255016326904297, "grad_norm": 0.4570185422897339, "loss": 0.45775056779384615, "time": 1.1869030237197875, "epoch": 764, "memory": 35394, "step": 238919}
{"lr": 8.565298877744326e-06, "data_time": 0.0022681236267089845, "grad_norm": 0.42792728543281555, "loss": 0.4579232305288315, "time": 1.317211627960205, "epoch": 764, "memory": 35394, "step": 239019}
{"lr": 8.41571727197249e-06, "data_time": 0.0023688793182373045, "grad_norm": 0.4890959471464157, "loss": 0.4626581221818924, "time": 1.283595871925354, "epoch": 764, "memory": 35394, "step": 239119}
{"lr": 8.248267489100854e-06, "data_time": 0.0025269031524658204, "grad_norm": 0.45186578035354613, "loss": 0.46151558458805086, "time": 1.2282955646514893, "epoch": 765, "memory": 35394, "step": 239232}
{"lr": 8.101478070401855e-06, "data_time": 0.002592945098876953, "grad_norm": Infinity, "loss": 0.4569170266389847, "time": 1.2043870449066163, "epoch": 765, "memory": 35394, "step": 239332}
{"lr": 7.955999919323823e-06, "data_time": 0.0023016929626464844, "grad_norm": 0.46211741864681244, "loss": 0.456260022521019, "time": 1.1717350482940674, "epoch": 765, "memory": 35394, "step": 239432}
{"lr": 7.793187955997484e-06, "data_time": 0.0020474910736083983, "grad_norm": 0.4761879086494446, "loss": 0.45934735238552094, "time": 1.1750452518463135, "epoch": 766, "memory": 35394, "step": 239545}
{"lr": 7.650503605221667e-06, "data_time": 0.0025473833084106445, "grad_norm": 0.46509272754192355, "loss": 0.4526811569929123, "time": 1.2072291851043702, "epoch": 766, "memory": 35394, "step": 239645}
{"lr": 7.50913126881429e-06, "data_time": 0.002783036231994629, "grad_norm": 0.4326241284608841, "loss": 0.460156586766243, "time": 1.2125674962997437, "epoch": 766, "memory": 35394, "step": 239745}
{"lr": 7.350959766202582e-06, "data_time": 0.002311515808105469, "grad_norm": 0.45300776064395903, "loss": 0.46163453757762907, "time": 1.2458564281463622, "epoch": 767, "memory": 35394, "step": 239858}
{"lr": 7.212382798005091e-06, "data_time": 0.0020931005477905274, "grad_norm": 0.4792204350233078, "loss": 0.4523064821958542, "time": 1.2203001260757447, "epoch": 767, "memory": 35394, "step": 239958}
{"lr": 7.0751185696397286e-06, "data_time": 0.0024380922317504884, "grad_norm": 0.48949461579322817, "loss": 0.45697333514690397, "time": 1.2672827005386353, "epoch": 767, "memory": 35394, "step": 240058}
{"lr": 6.921590093633694e-06, "data_time": 0.19116501808166503, "grad_norm": 0.46413721740245817, "loss": 0.4605990469455719, "time": 1.2259669542312621, "epoch": 768, "memory": 35394, "step": 240171}
{"lr": 6.787122756038869e-06, "data_time": 0.0022963762283325197, "grad_norm": 0.4505530118942261, "loss": 0.4578748315572739, "time": 1.2235259056091308, "epoch": 768, "memory": 35394, "step": 240271}
{"lr": 6.65396886244445e-06, "data_time": 0.002964687347412109, "grad_norm": 0.4703965067863464, "loss": 0.4554450958967209, "time": 1.3445289134979248, "epoch": 768, "memory": 35394, "step": 240371}
{"lr": 6.505085903614772e-06, "data_time": 0.0023160934448242187, "grad_norm": 0.4896960735321045, "loss": 0.45235889554023745, "time": 1.2348837852478027, "epoch": 769, "memory": 35394, "step": 240484}
{"lr": 6.3747303779796695e-06, "data_time": 0.0022276639938354492, "grad_norm": 0.4628123462200165, "loss": 0.45473507046699524, "time": 1.178087592124939, "epoch": 769, "memory": 35394, "step": 240584}
{"lr": 6.245688979206111e-06, "data_time": 0.002884960174560547, "grad_norm": 0.48979172706604, "loss": 0.45785077214241027, "time": 1.2268357515335082, "epoch": 769, "memory": 35394, "step": 240684}
{"lr": 6.101453952763134e-06, "data_time": 0.0021524429321289062, "grad_norm": 0.42882012724876406, "loss": 0.4523139506578445, "time": 1.2954868793487548, "epoch": 770, "memory": 35394, "step": 240797}
{"lr": 5.975212353742146e-06, "data_time": 0.002550816535949707, "grad_norm": 0.44505790770053866, "loss": 0.45651543140411377, "time": 1.2123514413833618, "epoch": 770, "memory": 35394, "step": 240897}
{"lr": 5.85028554312589e-06, "data_time": 0.002364516258239746, "grad_norm": 0.4554195821285248, "loss": 0.46169559061527254, "time": 1.3629374980926514, "epoch": 770, "memory": 35394, "step": 240997}
{"lr": 5.710700788880105e-06, "data_time": 0.0022966623306274413, "grad_norm": 0.48347924947738646, "loss": 0.45148820579051974, "time": 1.2661307811737061, "epoch": 771, "memory": 35394, "step": 241110}
{"lr": 5.588575164390727e-06, "data_time": 0.002322268486022949, "grad_norm": 0.4676786631345749, "loss": 0.45713887810707093, "time": 1.2476518869400024, "epoch": 771, "memory": 35394, "step": 241210}
{"lr": 5.467764968520359e-06, "data_time": 0.0025151968002319336, "grad_norm": 0.47734662890434265, "loss": 0.45218518376350403, "time": 1.2318619251251222, "epoch": 771, "memory": 35394, "step": 241310}
{"lr": 5.332832750844613e-06, "data_time": 0.0031611442565917967, "grad_norm": 0.471931928396225, "loss": 0.4597399830818176, "time": 1.211096453666687, "epoch": 772, "memory": 35394, "step": 241423}
{"lr": 5.214825082033996e-06, "data_time": 0.0024710178375244142, "grad_norm": 0.47011406123638155, "loss": 0.4578333705663681, "time": 1.2368692398071288, "epoch": 772, "memory": 35394, "step": 241523}
{"lr": 5.0981334607175625e-06, "data_time": 0.0024828672409057616, "grad_norm": 0.48104495406150816, "loss": 0.4579925209283829, "time": 1.2049071311950683, "epoch": 772, "memory": 35394, "step": 241623}
{"lr": 4.96785596851035e-06, "data_time": 0.0021487712860107423, "grad_norm": 0.44799740612506866, "loss": 0.45504570603370664, "time": 1.2032717704772948, "epoch": 773, "memory": 35394, "step": 241736}
{"lr": 4.853968169723447e-06, "data_time": 0.0024918317794799805, "grad_norm": 0.4580209106206894, "loss": 0.45266116261482237, "time": 1.197304940223694, "epoch": 773, "memory": 35394, "step": 241836}
{"lr": 4.741397015956754e-06, "data_time": 0.0023298263549804688, "grad_norm": 0.4819671005010605, "loss": 0.45871238112449647, "time": 1.2107107400894166, "epoch": 773, "memory": 35394, "step": 241936}
{"lr": 4.615776362606476e-06, "data_time": 0.08786725997924805, "grad_norm": 0.43990803956985475, "loss": 0.4539421170949936, "time": 1.2262946367263794, "epoch": 774, "memory": 35394, "step": 242049}
{"lr": 4.506010281354669e-06, "data_time": 0.002027750015258789, "grad_norm": 0.4680640548467636, "loss": 0.4567168474197388, "time": 1.2292298316955566, "epoch": 774, "memory": 35394, "step": 242149}
{"lr": 4.3975614212903635e-06, "data_time": 0.0027606725692749024, "grad_norm": 0.45237250030040743, "loss": 0.4523014187812805, "time": 1.3260164260864258, "epoch": 774, "memory": 35394, "step": 242249}
{"lr": 4.276599644641402e-06, "data_time": 0.002292966842651367, "grad_norm": 0.44279004335403443, "loss": 0.45722408294677735, "time": 1.2278416633605957, "epoch": 775, "memory": 35394, "step": 242362}
{"lr": 4.170957061572857e-06, "data_time": 0.0022223234176635743, "grad_norm": 0.4432824611663818, "loss": 0.4552129030227661, "time": 1.2450927257537843, "epoch": 775, "memory": 35394, "step": 242462}
{"lr": 4.0666322544907765e-06, "data_time": 0.0026401281356811523, "grad_norm": 0.45554190278053286, "loss": 0.45431635677814486, "time": 1.2781681537628173, "epoch": 775, "memory": 35394, "step": 242562}
{"lr": 3.950331316810187e-06, "data_time": 0.04178352355957031, "grad_norm": 0.48609547019004823, "loss": 0.4645447641611099, "time": 1.2027938604354858, "epoch": 776, "memory": 35394, "step": 242675}
{"lr": 3.848813945680969e-06, "data_time": 0.0025197982788085936, "grad_norm": 0.49265819787979126, "loss": 0.4523717433214188, "time": 1.2250449895858764, "epoch": 776, "memory": 35394, "step": 242775}
{"lr": 3.7486148839597056e-06, "data_time": 0.0022836923599243164, "grad_norm": 0.4517828345298767, "loss": 0.4627527236938477, "time": 1.1917829751968383, "epoch": 776, "memory": 35394, "step": 242875}
{"lr": 3.6369766719054285e-06, "data_time": 0.023519396781921387, "grad_norm": 0.4345546454191208, "loss": 0.4541274219751358, "time": 1.3902867078781127, "epoch": 777, "memory": 35394, "step": 242988}
{"lr": 3.5395861595512806e-06, "data_time": 0.0023573637008666992, "grad_norm": 0.4405841678380966, "loss": 0.45097137689590455, "time": 1.4963201522827148, "epoch": 777, "memory": 35394, "step": 243088}
{"lr": 3.4435144686408343e-06, "data_time": 0.0035285234451293947, "grad_norm": 0.46760224401950834, "loss": 0.45487540066242216, "time": 1.2085634231567384, "epoch": 777, "memory": 35394, "step": 243188}
{"lr": 3.3365407932312267e-06, "data_time": 0.0021447181701660157, "grad_norm": 0.4302529603242874, "loss": 0.4587853938341141, "time": 1.1918463706970215, "epoch": 778, "memory": 35394, "step": 243301}
{"lr": 3.2432787195411667e-06, "data_time": 0.002163982391357422, "grad_norm": 0.4699461877346039, "loss": 0.45444615483283995, "time": 1.3288400173187256, "epoch": 778, "memory": 35394, "step": 243401}
{"lr": 3.151335957936552e-06, "data_time": 0.003072166442871094, "grad_norm": 0.4430337756872177, "loss": 0.4562894493341446, "time": 1.2317944526672364, "epoch": 778, "memory": 35394, "step": 243501}
{"lr": 3.0490285545205475e-06, "data_time": 0.0019831418991088866, "grad_norm": 0.4651631236076355, "loss": 0.45364597737789153, "time": 1.2480713844299316, "epoch": 779, "memory": 35394, "step": 243614}
{"lr": 2.959896432411384e-06, "data_time": 0.0031415462493896485, "grad_norm": 0.46197842061519623, "loss": 0.45485938489437105, "time": 1.1635873556137084, "epoch": 779, "memory": 35394, "step": 243714}
{"lr": 2.8720840916271614e-06, "data_time": 0.0030786752700805663, "grad_norm": 0.47191864252090454, "loss": 0.4569881111383438, "time": 1.2592348337173462, "epoch": 779, "memory": 35394, "step": 243814}
{"lr": 2.7744446198568207e-06, "data_time": 0.002462482452392578, "grad_norm": 0.47226123213768006, "loss": 0.4577251851558685, "time": 1.240703535079956, "epoch": 780, "memory": 35394, "step": 243927}
{"lr": 2.689443895248388e-06, "data_time": 0.002236294746398926, "grad_norm": 0.4011823028326035, "loss": 0.4590547442436218, "time": 1.1788637161254882, "epoch": 780, "memory": 35394, "step": 244027}
{"lr": 2.6057633997944877e-06, "data_time": 0.003100085258483887, "grad_norm": 0.45880369246006014, "loss": 0.45919071435928344, "time": 1.2476716041564941, "epoch": 780, "memory": 35394, "step": 244127}
{"lr": 2.512793443597554e-06, "data_time": 0.002531123161315918, "grad_norm": 0.4701842188835144, "loss": 0.4512982040643692, "time": 1.3063141345977782, "epoch": 781, "memory": 35394, "step": 244240}
{"lr": 2.4319254953892407e-06, "data_time": 0.002183675765991211, "grad_norm": 0.47808593809604644, "loss": 0.4530068725347519, "time": 1.1766135692596436, "epoch": 781, "memory": 35394, "step": 244340}
{"lr": 2.3523782027479695e-06, "data_time": 0.002337026596069336, "grad_norm": 0.4532101571559906, "loss": 0.4624639362096786, "time": 1.2430482864379884, "epoch": 781, "memory": 35394, "step": 244440}
{"lr": 2.2640792703023836e-06, "data_time": 0.002145123481750488, "grad_norm": 0.460508731007576, "loss": 0.46054229736328123, "time": 1.2564297914505005, "epoch": 782, "memory": 35394, "step": 244553}
{"lr": 2.187345410350773e-06, "data_time": 0.00243680477142334, "grad_norm": 0.4403365731239319, "loss": 0.4584758818149567, "time": 1.2137657880783081, "epoch": 782, "memory": 35394, "step": 244653}
{"lr": 2.111932610954955e-06, "data_time": 0.002274227142333984, "grad_norm": 0.46226705312728883, "loss": 0.4555306077003479, "time": 1.233370852470398, "epoch": 782, "memory": 35394, "step": 244753}
{"lr": 2.028306134664329e-06, "data_time": 0.002026534080505371, "grad_norm": 0.5059593260288239, "loss": 0.456840968132019, "time": 1.2043344259262085, "epoch": 783, "memory": 35394, "step": 244866}
{"lr": 1.955707607761957e-06, "data_time": 0.002516627311706543, "grad_norm": 0.421297562122345, "loss": 0.45727035105228425, "time": 1.2363361597061158, "epoch": 783, "memory": 35394, "step": 244966}
{"lr": 1.884430524973628e-06, "data_time": 0.0024125337600708007, "grad_norm": 0.447797966003418, "loss": 0.45221089720726015, "time": 1.2602375745773315, "epoch": 783, "memory": 35394, "step": 245066}
{"lr": 1.8054778614439928e-06, "data_time": 0.002105093002319336, "grad_norm": 0.42175526916980743, "loss": 0.46371078193187715, "time": 1.3204092502593994, "epoch": 784, "memory": 35394, "step": 245179}
{"lr": 1.7370158452992782e-06, "data_time": 0.0026514291763305663, "grad_norm": 0.4713737010955811, "loss": 0.4607467085123062, "time": 1.2042938709259032, "epoch": 784, "memory": 35394, "step": 245279}
{"lr": 1.6698756353901401e-06, "data_time": 0.003294515609741211, "grad_norm": 0.4213728070259094, "loss": 0.46141262650489806, "time": 1.2433600902557373, "epoch": 784, "memory": 35394, "step": 245379}
{"lr": 1.595598065407997e-06, "data_time": 0.07996113300323486, "grad_norm": 0.4441481053829193, "loss": 0.4596773833036423, "time": 1.256958532333374, "epoch": 785, "memory": 35394, "step": 245492}
{"lr": 1.5312736706259705e-06, "data_time": 0.0021933794021606447, "grad_norm": 0.449671334028244, "loss": 0.45742085874080657, "time": 1.2624894618988036, "epoch": 785, "memory": 35394, "step": 245592}
{"lr": 1.4682714227583817e-06, "data_time": 0.003201127052307129, "grad_norm": 0.44461160004138944, "loss": 0.45918899178504946, "time": 1.2529770135879517, "epoch": 785, "memory": 35394, "step": 245692}
{"lr": 1.3986701512698254e-06, "data_time": 0.0025038957595825196, "grad_norm": 0.47080811858177185, "loss": 0.45409044325351716, "time": 1.2505369663238526, "epoch": 786, "memory": 35394, "step": 245805}
{"lr": 1.3384844213342732e-06, "data_time": 0.0024181365966796874, "grad_norm": 0.46610202491283415, "loss": 0.45672057271003724, "time": 1.3031511545181274, "epoch": 786, "memory": 35394, "step": 245905}
{"lr": 1.2796211575436696e-06, "data_time": 0.003111100196838379, "grad_norm": 0.47030642032623293, "loss": 0.4558993011713028, "time": 1.4049696445465087, "epoch": 786, "memory": 35394, "step": 246005}
{"lr": 1.2146973136350364e-06, "data_time": 0.07558591365814209, "grad_norm": 0.45953467190265657, "loss": 0.46130265593528746, "time": 1.2093215703964233, "epoch": 787, "memory": 35394, "step": 246118}
{"lr": 1.1586512248913646e-06, "data_time": 0.0022574901580810548, "grad_norm": 0.4316775679588318, "loss": 0.45754713416099546, "time": 1.2784058570861816, "epoch": 787, "memory": 35394, "step": 246218}
{"lr": 1.1039279000699102e-06, "data_time": 0.0030145883560180665, "grad_norm": 0.4719018280506134, "loss": 0.45377394556999207, "time": 1.2175618886947632, "epoch": 787, "memory": 35394, "step": 246318}
{"lr": 1.0436825369493016e-06, "data_time": 0.0024311304092407226, "grad_norm": 0.4390616178512573, "loss": 0.45856316685676574, "time": 1.2195712089538575, "epoch": 788, "memory": 35394, "step": 246431}
{"lr": 9.917769985885428e-07, "data_time": 0.002134394645690918, "grad_norm": 0.46359516084194186, "loss": 0.45999531745910643, "time": 1.2706112623214723, "epoch": 788, "memory": 35394, "step": 246531}
{"lr": 9.411945004694131e-07, "data_time": 0.002785921096801758, "grad_norm": 0.4526266545057297, "loss": 0.45277199149131775, "time": 1.2403903245925902, "epoch": 788, "memory": 35394, "step": 246631}
{"lr": 8.85628595449547e-07, "data_time": 0.0022148370742797853, "grad_norm": 0.44270247519016265, "loss": 0.45692242980003356, "time": 1.2918251514434815, "epoch": 789, "memory": 35394, "step": 246744}
{"lr": 8.378644494944202e-07, "data_time": 0.0030910730361938476, "grad_norm": 0.46406300365924835, "loss": 0.4526375085115433, "time": 1.2413436651229859, "epoch": 789, "memory": 35394, "step": 246844}
{"lr": 7.914235986373255e-07, "data_time": 0.0023664236068725586, "grad_norm": 0.45360417366027833, "loss": 0.44677814841270447, "time": 1.2280695915222168, "epoch": 789, "memory": 35394, "step": 246944}
{"lr": 7.405380531198212e-07, "data_time": 0.0021975040435791016, "grad_norm": 0.4457146942615509, "loss": 0.46091421246528624, "time": 1.198221755027771, "epoch": 790, "memory": 35394, "step": 247057}
{"lr": 6.969160744100739e-07, "data_time": 0.0023314714431762694, "grad_norm": 0.4522430658340454, "loss": 0.4553385257720947, "time": 1.2199033737182616, "epoch": 790, "memory": 35394, "step": 247157}
{"lr": 6.546176241881997e-07, "data_time": 0.0022869348526000977, "grad_norm": 0.44727493822574615, "loss": 0.4605765700340271, "time": 1.2191221237182617, "epoch": 790, "memory": 35394, "step": 247257}
{"lr": 6.084132636490108e-07, "data_time": 0.0022689104080200195, "grad_norm": 0.46824481189250944, "loss": 0.45505719184875487, "time": 1.2559077262878418, "epoch": 791, "memory": 35394, "step": 247370}
{"lr": 5.689341598293423e-07, "data_time": 0.002199554443359375, "grad_norm": 0.40616694688796995, "loss": 0.46164270043373107, "time": 1.2462112665176392, "epoch": 791, "memory": 35394, "step": 247470}
{"lr": 5.307787964169172e-07, "data_time": 0.002535891532897949, "grad_norm": 0.47088162899017333, "loss": 0.46148563027381895, "time": 1.2089017391204835, "epoch": 791, "memory": 35394, "step": 247570}
{"lr": 4.892563703927444e-07, "data_time": 0.002397751808166504, "grad_norm": 0.4619724273681641, "loss": 0.45506454408168795, "time": 1.221636700630188, "epoch": 792, "memory": 35394, "step": 247683}
{"lr": 4.5392078190152253e-07, "data_time": 0.0023115873336791992, "grad_norm": 0.4637978732585907, "loss": 0.4588434636592865, "time": 1.1968657493591308, "epoch": 792, "memory": 35394, "step": 247783}
{"lr": 4.1990912426271516e-07, "data_time": 0.0035735607147216798, "grad_norm": 0.44136805534362794, "loss": 0.45639442205429076, "time": 1.2154022693634032, "epoch": 792, "memory": 35394, "step": 247883}
{"lr": 3.830693063390125e-07, "data_time": 0.0022624969482421876, "grad_norm": 0.40869236886501314, "loss": 0.45822949707508087, "time": 1.2065712213516235, "epoch": 793, "memory": 35394, "step": 247996}
{"lr": 3.5187780639735245e-07, "data_time": 0.0021224737167358397, "grad_norm": 0.45536438524723055, "loss": 0.4554630845785141, "time": 1.2089356184005737, "epoch": 793, "memory": 35394, "step": 248096}
{"lr": 3.220104062764128e-07, "data_time": 0.0033193349838256834, "grad_norm": 0.4312094211578369, "loss": 0.4575344443321228, "time": 1.204163670539856, "epoch": 793, "memory": 35394, "step": 248196}
{"lr": 2.8985379407628587e-07, "data_time": 0.002152752876281738, "grad_norm": 0.46006512343883516, "loss": 0.4599510759115219, "time": 1.2262983083724976, "epoch": 794, "memory": 35394, "step": 248309}
{"lr": 2.628068886793447e-07, "data_time": 0.0025472640991210938, "grad_norm": 0.41309713423252103, "loss": 0.4504065871238708, "time": 1.1783966064453124, "epoch": 794, "memory": 35394, "step": 248409}
{"lr": 2.3708423059127913e-07, "data_time": 0.0029826641082763674, "grad_norm": 0.4450812667608261, "loss": 0.4636229932308197, "time": 1.2633219480514526, "epoch": 794, "memory": 35394, "step": 248509}
{"lr": 2.096113457659848e-07, "data_time": 0.0022403240203857423, "grad_norm": 0.4949064701795578, "loss": 0.4576050043106079, "time": 1.283603549003601, "epoch": 795, "memory": 35394, "step": 248622}
{"lr": 1.867094736741685e-07, "data_time": 0.0026101589202880858, "grad_norm": 0.43979112803936005, "loss": 0.45615373849868773, "time": 1.2121922492980957, "epoch": 795, "memory": 35394, "step": 248722}
{"lr": 1.6513197489736675e-07, "data_time": 0.0031782865524291994, "grad_norm": 0.4418237298727036, "loss": 0.45279367864131925, "time": 1.2361603498458862, "epoch": 795, "memory": 35394, "step": 248822}
{"lr": 1.4234326311778207e-07, "data_time": 0.0022954940795898438, "grad_norm": 0.42461791038513186, "loss": 0.4586110353469849, "time": 1.243241024017334, "epoch": 796, "memory": 35394, "step": 248935}
{"lr": 1.2358679584999516e-07, "data_time": 0.002677583694458008, "grad_norm": 0.4879525423049927, "loss": 0.45122780799865725, "time": 1.1669663190841675, "epoch": 796, "memory": 35394, "step": 249035}
{"lr": 1.0615480641921387e-07, "data_time": 0.002880358695983887, "grad_norm": 0.4159923166036606, "loss": 0.45705786049366, "time": 1.2159512996673585, "epoch": 796, "memory": 35394, "step": 249135}
{"lr": 8.805063736846768e-08, "data_time": 0.0023324012756347655, "grad_norm": 0.46014591455459597, "loss": 0.4561008185148239, "time": 1.249536943435669, "epoch": 797, "memory": 35394, "step": 249248}
{"lr": 7.343987919607363e-08, "data_time": 0.0023945331573486327, "grad_norm": 0.42066307961940763, "loss": 0.4550947189331055, "time": 1.5150959253311158, "epoch": 797, "memory": 35394, "step": 249348}
{"lr": 6.015368189657455e-08, "data_time": 0.0027132511138916017, "grad_norm": 0.4351486325263977, "loss": 0.4651791572570801, "time": 1.2275962591171266, "epoch": 797, "memory": 35394, "step": 249448}
{"lr": 4.6734349264272846e-08, "data_time": 0.002067160606384277, "grad_norm": 0.4567737698554993, "loss": 0.4507841795682907, "time": 1.3016884326934814, "epoch": 798, "memory": 35394, "step": 249561}
{"lr": 3.6269537205943125e-08, "data_time": 0.0027469635009765626, "grad_norm": 0.45248425006866455, "loss": 0.4633345365524292, "time": 1.33692524433136, "epoch": 798, "memory": 35394, "step": 249661}
{"lr": 2.7129347569230075e-08, "data_time": 0.002409839630126953, "grad_norm": 0.4211717575788498, "loss": 0.45732645988464354, "time": 1.2458019256591797, "epoch": 798, "memory": 35394, "step": 249761}
{"lr": 1.839506904657122e-08, "data_time": 0.0029026269912719727, "grad_norm": 0.46024511754512787, "loss": 0.46057466268539426, "time": 1.2560510873794555, "epoch": 799, "memory": 35394, "step": 249874}
{"lr": 1.2076372864732218e-08, "data_time": 0.0024103403091430666, "grad_norm": 0.4475048273801804, "loss": 0.4574493199586868, "time": 1.2390231847763062, "epoch": 799, "memory": 35394, "step": 249974}
{"lr": 7.0823391648205155e-09, "data_time": 0.0028716564178466798, "grad_norm": 0.45238871276378634, "loss": 0.463871967792511, "time": 1.2357147455215454, "epoch": 799, "memory": 35394, "step": 250074}
{"lr": 3.0332564411314534e-09, "data_time": 0.002594780921936035, "grad_norm": 0.4378443717956543, "loss": 0.4559149831533432, "time": 1.2693604230880737, "epoch": 800, "memory": 35394, "step": 250187}
{"lr": 8.607786389446587e-10, "data_time": 0.002192521095275879, "grad_norm": 0.4334086537361145, "loss": 0.4526620000600815, "time": 1.1906485557556152, "epoch": 800, "memory": 35394, "step": 250287}
{"lr": 1.2981890051831862e-11, "data_time": 0.002639150619506836, "grad_norm": 0.4757295101881027, "loss": 0.4567994445562363, "time": 1.1930061340332032, "epoch": 800, "memory": 35394, "step": 250387}
